1 DATA

1.1 Pop sive over time dataset

data <- read.table(file=here::here("data", "BE_Census_Data.csv"), sep=",", header=TRUE)
  
head(data)
##    Block       Old_Label                  Label Population Genetic_Diversity
## 1 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High1      High1              High
## 2 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High2      High2              High
## 3 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High3      High3              High
## 4 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High4      High4              High
## 5 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High5      High5              High
## 6 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High6      High6              High
##   Generation_Parents Generation_Eggs Date_Census Date_Start_Eggs Date_End_Eggs
## 1                  0               1     2/17/21         2/17/21       2/18/21
## 2                  0               1     2/17/21         2/17/21       2/18/21
## 3                  0               1     2/17/21         2/17/21       2/18/21
## 4                  0               1     2/17/21         2/17/21       2/18/21
## 5                  0               1     2/17/21         2/17/21       2/18/21
## 6                  0               1     2/17/21         2/17/21       2/18/21
##   Image_Box1 Image_Box2 MC_Box1 MC_Box2 MC_Total_Adults HC_Box1 HC_Box2
## 1   DSC_0155       <NA>      NA      NA           101.9      NA      NA
## 2   DSC_0156       <NA>      NA      NA           104.7      NA      NA
## 3   DSC_0157       <NA>      NA      NA           105.8      NA      NA
## 4   DSC_0158       <NA>      NA      NA           101.9      NA      NA
## 5   DSC_0159       <NA>      NA      NA           102.2      NA      NA
## 6   DSC_0160       <NA>      NA      NA           101.7      NA      NA
##   HC_Total_Adults Nb_parents Growth_rate First_Throw Extinction_finalstatus
## 1             100         NA          NA          no                     no
## 2             100         NA          NA          no                     no
## 3             100         NA          NA          no                     no
## 4             100         NA          NA          no                     no
## 5             100         NA          NA          no                     no
## 6             100         NA          NA          no                     no
##   Less_than_5
## 1          no
## 2          no
## 3          no
## 4          no
## 5          no
## 6          no
tail(data)
##      Block         Old_Label                  Label Population
## 571 Block5 B5 - Backup Fam H 8/25 BE B5 | G6 Med 19      Med19
## 572 Block5 B5 - Backup Fam I 8/25 BE B5 | G6 Med 20      Med20
## 573 Block5   B5-Backup Fam A 8/25 BE B5 | G6 Med 21      Med21
## 574 Block5   B5-Backup Fam B 8/25 BE B5 | G6 Med 22      Med22
## 575 Block5   B5-Backup Fam C 8/25 BE B5 | G6 Med 23      Med23
## 576 Block5   B5-Backup Fam D 8/25 BE B5 | G6 Med 24      Med24
##     Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## 571               Med                  5               6     8/25/21
## 572               Med                  5               6     8/25/21
## 573               Med                  5               6     8/25/21
## 574               Med                  5               6     8/25/21
## 575               Med                  5               6     8/25/21
## 576               Med                  5               6     8/25/21
##     Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2  MC_Box1  MC_Box2
## 571         8/25/21       8/26/21   DSC_0273   DSC_0274  2.00000  2.00000
## 572         8/25/21       8/26/21       <NA>       <NA>       NA       NA
## 573         8/25/21       8/26/21   DSC_0277   DSC_0278 29.76978 32.92378
## 574         8/25/21       8/26/21       <NA>       <NA>       NA       NA
## 575         8/25/21       8/26/21   DSC_0267   DSC_0268 25.31021 46.10092
## 576         8/25/21       8/26/21   DSC_0283   DSC_0284 71.09684 55.37443
##     MC_Total_Adults HC_Box1 HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## 571             4.0       2       2               4          8    0.500000
## 572             0.0      NA      NA              NA          0          NA
## 573            62.7      23      23              46         35    1.314286
## 574             0.0      NA      NA              NA         NA          NA
## 575            71.4      20      36              56         40    1.400000
## 576           126.5      76      57             133         41    3.243902
##     First_Throw Extinction_finalstatus Less_than_5
## 571          no                     no         yes
## 572     extinct                    yes         yes
## 573          no                     no          no
## 574     extinct                    yes         yes
## 575          no                     no          no
## 576          no                     no          no
dim(data)
## [1] 576  23
summary(data)
##     Block            Old_Label            Label            Population       
##  Length:576         Length:576         Length:576         Length:576        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Genetic_Diversity  Generation_Parents Generation_Eggs Date_Census       
##  Length:576         Min.   :0.0        Min.   :1.0     Length:576        
##  Class :character   1st Qu.:1.0        1st Qu.:2.0     Class :character  
##  Mode  :character   Median :2.5        Median :3.5     Mode  :character  
##                     Mean   :2.5        Mean   :3.5                       
##                     3rd Qu.:4.0        3rd Qu.:5.0                       
##                     Max.   :5.0        Max.   :6.0                       
##                                                                          
##  Date_Start_Eggs    Date_End_Eggs       Image_Box1         Image_Box2       
##  Length:576         Length:576         Length:576         Length:576        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     MC_Box1          MC_Box2       MC_Total_Adults     HC_Box1      
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 26.52   1st Qu.: 25.99   1st Qu.: 51.05   1st Qu.: 22.00  
##  Median : 58.87   Median : 54.29   Median :101.50   Median : 56.50  
##  Mean   : 75.88   Mean   : 70.57   Mean   :128.37   Mean   : 72.06  
##  3rd Qu.:106.93   3rd Qu.: 99.40   3rd Qu.:170.25   3rd Qu.:101.00  
##  Max.   :327.40   Max.   :299.00   Max.   :514.40   Max.   :288.00  
##  NA's   :142      NA's   :141      NA's   :5        NA's   :144     
##     HC_Box2       HC_Total_Adults   Nb_parents      Growth_rate    
##  Min.   :  0.00   Min.   :  0.0   Min.   :  0.00   Min.   :0.0000  
##  1st Qu.: 25.00   1st Qu.: 62.0   1st Qu.: 67.25   1st Qu.:0.4183  
##  Median : 52.00   Median :100.0   Median :100.00   Median :0.9386  
##  Mean   : 68.08   Mean   :131.8   Mean   :138.30   Mean   :1.4482  
##  3rd Qu.: 96.75   3rd Qu.:169.0   3rd Qu.:188.00   3rd Qu.:2.3587  
##  Max.   :290.00   Max.   :499.0   Max.   :499.00   Max.   :6.8571  
##  NA's   :142      NA's   :43      NA's   :122      NA's   :142     
##  First_Throw        Extinction_finalstatus Less_than_5       
##  Length:576         Length:576             Length:576        
##  Class :character   Class :character       Class :character  
##  Mode  :character   Mode  :character       Mode  :character  
##                                                              
##                                                              
##                                                              
## 
#Remove populations killed due to the pathogen
data_status  <- data.frame(data$Population,data$Extinction_finalstatus)
data_kill<-data[data$Extinction_finalstatus=="kill",]
dim(data_kill)
## [1] 72 23
data<-data[data$Extinction_finalstatus!="kill",]

#Add pop growth rate
data$Nb_adults <- data$HC_Total_Adults
data$Lambda <- data$Nb_adults / data$Nb_parents
data$obs<-as.factor(1:nrow(data))
#summary(data)
#Remove 

#Check variables
str(data)
## 'data.frame':    504 obs. of  26 variables:
##  $ Block                 : chr  "Block3" "Block3" "Block3" "Block3" ...
##  $ Old_Label             : chr  "B3_All_Red_Mix " "B3_All_Red_Mix " "B3_All_Red_Mix " "B3_All_Red_Mix " ...
##  $ Label                 : chr  "BE B3 2/17 | G1  High1" "BE B3 2/17 | G1  High2" "BE B3 2/17 | G1  High3" "BE B3 2/17 | G1  High4" ...
##  $ Population            : chr  "High1" "High2" "High3" "High4" ...
##  $ Genetic_Diversity     : chr  "High" "High" "High" "High" ...
##  $ Generation_Parents    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Generation_Eggs       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date_Census           : chr  "2/17/21" "2/17/21" "2/17/21" "2/17/21" ...
##  $ Date_Start_Eggs       : chr  "2/17/21" "2/17/21" "2/17/21" "2/17/21" ...
##  $ Date_End_Eggs         : chr  "2/18/21" "2/18/21" "2/18/21" "2/18/21" ...
##  $ Image_Box1            : chr  "DSC_0155" "DSC_0156" "DSC_0157" "DSC_0158" ...
##  $ Image_Box2            : chr  NA NA NA NA ...
##  $ MC_Box1               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ MC_Box2               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ MC_Total_Adults       : num  102 105 106 102 102 ...
##  $ HC_Box1               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ HC_Box2               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ HC_Total_Adults       : int  100 100 100 100 100 100 100 100 100 100 ...
##  $ Nb_parents            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Growth_rate           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ First_Throw           : chr  "no" "no" "no" "no" ...
##  $ Extinction_finalstatus: chr  "no" "no" "no" "no" ...
##  $ Less_than_5           : chr  "no" "no" "no" "no" ...
##  $ Nb_adults             : int  100 100 100 100 100 100 100 100 100 100 ...
##  $ Lambda                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ obs                   : Factor w/ 504 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
data$Genetic_Diversity <- as.factor(data$Genetic_Diversity)
data$Block <- as.factor(data$Block)
data$Population <- as.factor(data$Population)
data$Extinction_finalstatus <- as.factor(data$Extinction_finalstatus)

#Order levels 
data$Genetic_Diversity <- plyr::revalue(data$Genetic_Diversity, c("Med"="Medium"))
data$Genetic_Diversity <- factor(data$Genetic_Diversity, levels = c("High", "Medium", "Low"))
levels(data$Genetic_Diversity)
## [1] "High"   "Medium" "Low"
data<-droplevels(data) #remove previous levels 
str(data)
## 'data.frame':    504 obs. of  26 variables:
##  $ Block                 : Factor w/ 3 levels "Block3","Block4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Old_Label             : chr  "B3_All_Red_Mix " "B3_All_Red_Mix " "B3_All_Red_Mix " "B3_All_Red_Mix " ...
##  $ Label                 : chr  "BE B3 2/17 | G1  High1" "BE B3 2/17 | G1  High2" "BE B3 2/17 | G1  High3" "BE B3 2/17 | G1  High4" ...
##  $ Population            : Factor w/ 84 levels "High1","High13",..: 1 9 19 24 25 26 27 28 39 49 ...
##  $ Genetic_Diversity     : Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 3 3 3 ...
##  $ Generation_Parents    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Generation_Eggs       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date_Census           : chr  "2/17/21" "2/17/21" "2/17/21" "2/17/21" ...
##  $ Date_Start_Eggs       : chr  "2/17/21" "2/17/21" "2/17/21" "2/17/21" ...
##  $ Date_End_Eggs         : chr  "2/18/21" "2/18/21" "2/18/21" "2/18/21" ...
##  $ Image_Box1            : chr  "DSC_0155" "DSC_0156" "DSC_0157" "DSC_0158" ...
##  $ Image_Box2            : chr  NA NA NA NA ...
##  $ MC_Box1               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ MC_Box2               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ MC_Total_Adults       : num  102 105 106 102 102 ...
##  $ HC_Box1               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ HC_Box2               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ HC_Total_Adults       : int  100 100 100 100 100 100 100 100 100 100 ...
##  $ Nb_parents            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Growth_rate           : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ First_Throw           : chr  "no" "no" "no" "no" ...
##  $ Extinction_finalstatus: Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Less_than_5           : chr  "no" "no" "no" "no" ...
##  $ Nb_adults             : int  100 100 100 100 100 100 100 100 100 100 ...
##  $ Lambda                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ obs                   : Factor w/ 504 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
head(data)
##    Block       Old_Label                  Label Population Genetic_Diversity
## 1 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High1      High1              High
## 2 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High2      High2              High
## 3 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High3      High3              High
## 4 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High4      High4              High
## 5 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High5      High5              High
## 6 Block3 B3_All_Red_Mix  BE B3 2/17 | G1  High6      High6              High
##   Generation_Parents Generation_Eggs Date_Census Date_Start_Eggs Date_End_Eggs
## 1                  0               1     2/17/21         2/17/21       2/18/21
## 2                  0               1     2/17/21         2/17/21       2/18/21
## 3                  0               1     2/17/21         2/17/21       2/18/21
## 4                  0               1     2/17/21         2/17/21       2/18/21
## 5                  0               1     2/17/21         2/17/21       2/18/21
## 6                  0               1     2/17/21         2/17/21       2/18/21
##   Image_Box1 Image_Box2 MC_Box1 MC_Box2 MC_Total_Adults HC_Box1 HC_Box2
## 1   DSC_0155       <NA>      NA      NA           101.9      NA      NA
## 2   DSC_0156       <NA>      NA      NA           104.7      NA      NA
## 3   DSC_0157       <NA>      NA      NA           105.8      NA      NA
## 4   DSC_0158       <NA>      NA      NA           101.9      NA      NA
## 5   DSC_0159       <NA>      NA      NA           102.2      NA      NA
## 6   DSC_0160       <NA>      NA      NA           101.7      NA      NA
##   HC_Total_Adults Nb_parents Growth_rate First_Throw Extinction_finalstatus
## 1             100         NA          NA          no                     no
## 2             100         NA          NA          no                     no
## 3             100         NA          NA          no                     no
## 4             100         NA          NA          no                     no
## 5             100         NA          NA          no                     no
## 6             100         NA          NA          no                     no
##   Less_than_5 Nb_adults Lambda obs
## 1          no       100     NA   1
## 2          no       100     NA   2
## 3          no       100     NA   3
## 4          no       100     NA   4
## 5          no       100     NA   5
## 6          no       100     NA   6
tail(data)
##      Block         Old_Label                  Label Population
## 571 Block5 B5 - Backup Fam H 8/25 BE B5 | G6 Med 19      Med19
## 572 Block5 B5 - Backup Fam I 8/25 BE B5 | G6 Med 20      Med20
## 573 Block5   B5-Backup Fam A 8/25 BE B5 | G6 Med 21      Med21
## 574 Block5   B5-Backup Fam B 8/25 BE B5 | G6 Med 22      Med22
## 575 Block5   B5-Backup Fam C 8/25 BE B5 | G6 Med 23      Med23
## 576 Block5   B5-Backup Fam D 8/25 BE B5 | G6 Med 24      Med24
##     Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## 571            Medium                  5               6     8/25/21
## 572            Medium                  5               6     8/25/21
## 573            Medium                  5               6     8/25/21
## 574            Medium                  5               6     8/25/21
## 575            Medium                  5               6     8/25/21
## 576            Medium                  5               6     8/25/21
##     Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2  MC_Box1  MC_Box2
## 571         8/25/21       8/26/21   DSC_0273   DSC_0274  2.00000  2.00000
## 572         8/25/21       8/26/21       <NA>       <NA>       NA       NA
## 573         8/25/21       8/26/21   DSC_0277   DSC_0278 29.76978 32.92378
## 574         8/25/21       8/26/21       <NA>       <NA>       NA       NA
## 575         8/25/21       8/26/21   DSC_0267   DSC_0268 25.31021 46.10092
## 576         8/25/21       8/26/21   DSC_0283   DSC_0284 71.09684 55.37443
##     MC_Total_Adults HC_Box1 HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## 571             4.0       2       2               4          8    0.500000
## 572             0.0      NA      NA              NA          0          NA
## 573            62.7      23      23              46         35    1.314286
## 574             0.0      NA      NA              NA         NA          NA
## 575            71.4      20      36              56         40    1.400000
## 576           126.5      76      57             133         41    3.243902
##     First_Throw Extinction_finalstatus Less_than_5 Nb_adults   Lambda obs
## 571          no                     no         yes         4 0.500000 499
## 572     extinct                    yes         yes        NA       NA 500
## 573          no                     no          no        46 1.314286 501
## 574     extinct                    yes         yes        NA       NA 502
## 575          no                     no          no        56 1.400000 503
## 576          no                     no          no       133 3.243902 504
dim(data)
## [1] 504  26
#Add variable 
data$gen_square <- data$Generation_Eggs^2




##Color code
                   # values = c("#685338", "#FEA698", "#7BC7AD")) +
                   # values = c( "#51392C","#DA5837", "#4C9E97")) +
                   # values = c( "#00AFBB", "#E7B800", "#FC4E07")) +
                  #  values = c("#9AD3CA", "#929FD1", "#E7C7D7")) +
                   #values = c("#227C9D", "#FFCB77", "#FE6D73")) +

1.2 Heterozygosity remain over time

#Upload He dataset
data_he <- read.table(file=here::here("data", "He_ReMaining_BeatricePop.csv"), sep=",", header=TRUE)
data_he$Population <- as.factor(data_he$Population)
data_he <- data_he[,c(1,3)]

#Merge dataset: add heterozygosity data to oridinal data 
data <- merge(x = data, y = data_he, by = "Population", all.x=TRUE)

### Creation of 4 He: 
  # 1- He_start: He was remaining for each population when we started the experiment 
  # 2- He_lost_gen_t: He was lost only during this specfic generation during the experiment  (considering He=1 before this generation)
  # 3- He_remain: He was remaining after each generation (He_remain[Gen=6]=He_end)
  # 4- He_exp: He was lost during the entire experiment (considering He=1 at the beginning of the exp)
  # 5- He_end: He was remaining for each population when we finished the experiment (considering the real He at the beginning of the exp, He_start, not 1)


##### 
##### 1- He_start
#####
colnames(data)[28] <- "He_start"



##### 
##### 2- He_lost_gen_t
##### 
#Add He lost each generation
data$He_lost_gen_t <- 1 - (1/(2*data$Nb_adults))
#To consider extinct populations
is.na(data$He_lost_gen_t) <- sapply (data$He_lost_gen_t, is.infinite)


##### 
##### 3- He_remain
##### 
#Create new variable He remain per generation
data$He_remain <- NA
#Initiate with gen=1: He_start * He_lost_gen1
data$He_remain[data$Generation_Eggs=="1"] <- data$He_start[data$Generation_Eggs=="1"]*
  data$He_lost_gen_t[data$Generation_Eggs=="1"]

#Compute for all the other genrations, except the first one 
for(i in levels(data$Population)){
     ifelse(sum(data$Generation_Eggs[data$Population==i])!=21,print("ERROR"),print(i))
 for(j in 2:max(data$Generation_Eggs)){
   data$He_remain[data$Population==i&data$Generation_Eggs==j] <- 
     data$He_remain[data$Population==i&data$Generation_Eggs==j-1]*
     data$He_lost_gen_t[data$Population==i&data$Generation_Eggs==j]
   }
}
## [1] "High1"
## [1] "High13"
## [1] "High14"
## [1] "High15"
## [1] "High16"
## [1] "High17"
## [1] "High18"
## [1] "High19"
## [1] "High2"
## [1] "High20"
## [1] "High21"
## [1] "High22"
## [1] "High23"
## [1] "High24"
## [1] "High25"
## [1] "High26"
## [1] "High27"
## [1] "High28"
## [1] "High3"
## [1] "High32"
## [1] "High33"
## [1] "High34"
## [1] "High35"
## [1] "High4"
## [1] "High5"
## [1] "High6"
## [1] "High7"
## [1] "Low1"
## [1] "Low10"
## [1] "Low11"
## [1] "Low12"
## [1] "Low13"
## [1] "Low14"
## [1] "Low15"
## [1] "Low16"
## [1] "Low17"
## [1] "Low18"
## [1] "Low19"
## [1] "Low2"
## [1] "Low20"
## [1] "Low21"
## [1] "Low22"
## [1] "Low24"
## [1] "Low25"
## [1] "Low26"
## [1] "Low27"
## [1] "Low28"
## [1] "Low29"
## [1] "Low3"
## [1] "Low30"
## [1] "Low31"
## [1] "Low32"
## [1] "Low33"
## [1] "Low34"
## [1] "Low35"
## [1] "Low36"
## [1] "Low4"
## [1] "Low5"
## [1] "Low7"
## [1] "Low8"
## [1] "Low9"
## [1] "Med1"
## [1] "Med10"
## [1] "Med11"
## [1] "Med12"
## [1] "Med13"
## [1] "Med14"
## [1] "Med15"
## [1] "Med17"
## [1] "Med18"
## [1] "Med19"
## [1] "Med2"
## [1] "Med20"
## [1] "Med21"
## [1] "Med22"
## [1] "Med23"
## [1] "Med24"
## [1] "Med3"
## [1] "Med4"
## [1] "Med5"
## [1] "Med6"
## [1] "Med7"
## [1] "Med8"
## [1] "Med9"
##### 
##### 4- He_exp
##### 
# He at the end of the experiment 
data$He_exp <- NA 

#Compute the HE at the end of the experiment 
for(i in levels(data$Population)){
temp_data_pop <- subset(data, Population==i)
ifelse(sum(temp_data_pop$Generation_Eggs)!=21,print("ERROR"),print(i))
temp_he_remaining_gen <- 1 - (1/(2*temp_data_pop$Nb_adults))
#To consider extinct add this row: 
is.na(temp_he_remaining_gen) <- sapply (temp_he_remaining_gen, is.infinite)
#Compute for the whole experiment 
temp_he_remain_exp <- prod(temp_he_remaining_gen, na.rm = TRUE)
data$He_exp[data$Population==i] <- temp_he_remain_exp
rm(temp_he_remain_exp,temp_he_remaining_gen,temp_data_pop)
}
## [1] "High1"
## [1] "High13"
## [1] "High14"
## [1] "High15"
## [1] "High16"
## [1] "High17"
## [1] "High18"
## [1] "High19"
## [1] "High2"
## [1] "High20"
## [1] "High21"
## [1] "High22"
## [1] "High23"
## [1] "High24"
## [1] "High25"
## [1] "High26"
## [1] "High27"
## [1] "High28"
## [1] "High3"
## [1] "High32"
## [1] "High33"
## [1] "High34"
## [1] "High35"
## [1] "High4"
## [1] "High5"
## [1] "High6"
## [1] "High7"
## [1] "Low1"
## [1] "Low10"
## [1] "Low11"
## [1] "Low12"
## [1] "Low13"
## [1] "Low14"
## [1] "Low15"
## [1] "Low16"
## [1] "Low17"
## [1] "Low18"
## [1] "Low19"
## [1] "Low2"
## [1] "Low20"
## [1] "Low21"
## [1] "Low22"
## [1] "Low24"
## [1] "Low25"
## [1] "Low26"
## [1] "Low27"
## [1] "Low28"
## [1] "Low29"
## [1] "Low3"
## [1] "Low30"
## [1] "Low31"
## [1] "Low32"
## [1] "Low33"
## [1] "Low34"
## [1] "Low35"
## [1] "Low36"
## [1] "Low4"
## [1] "Low5"
## [1] "Low7"
## [1] "Low8"
## [1] "Low9"
## [1] "Med1"
## [1] "Med10"
## [1] "Med11"
## [1] "Med12"
## [1] "Med13"
## [1] "Med14"
## [1] "Med15"
## [1] "Med17"
## [1] "Med18"
## [1] "Med19"
## [1] "Med2"
## [1] "Med20"
## [1] "Med21"
## [1] "Med22"
## [1] "Med23"
## [1] "Med24"
## [1] "Med3"
## [1] "Med4"
## [1] "Med5"
## [1] "Med6"
## [1] "Med7"
## [1] "Med8"
## [1] "Med9"
##### 
##### 5- He_end
##### 
# Remaining He at the end of the experiment
data$He_end <- data$He_start * data$He_exp

#Save these values for Phenotyping 
data_he <- merge(x = data_he, 
                 y = data[data$Generation_Eggs==1,
                          c("Population","He_start",
                             "He_exp","He_end")], 
                 by = "Population", all.x=TRUE)

1.3 Phenotyping dataset

#Upload growth rate end of experiment
data_phenotyping <- read.table(file=here::here("data", "Adaptation_Data.csv"), sep=",", header=TRUE)


#Factors variables 
data_phenotyping$ID_Rep <- as.factor(data_phenotyping$ID_Rep)
data_phenotyping$Week <- as.factor(data_phenotyping$Week)
data_phenotyping$Block <- as.factor(data_phenotyping$Block)
data_phenotyping$Population <- as.factor(data_phenotyping$Population)
#Revalue
data_phenotyping$Genetic_Diversity <- as.factor(data_phenotyping$Divsersity)
data_phenotyping$Genetic_Diversity <-   plyr::revalue(data_phenotyping$Genetic_Diversity, c("Med"="Medium"))
data_phenotyping$Genetic_Diversity <- factor(data_phenotyping$Genetic_Diversity, 
                                             levels = c("High", "Medium", "Low"))



#Add sum total 
data_phenotyping$Nb_ttx <- rowSums(data_phenotyping[,11:13], na.rm=TRUE)
#Proportion
data_phenotyping$p_adults <- data_phenotyping$Adults  / data_phenotyping$Nb_ttx
data_phenotyping$p_pupae <- data_phenotyping$Pupae / data_phenotyping$Nb_ttx
data_phenotyping$p_larvae <- data_phenotyping$Larvae / data_phenotyping$Nb_ttx

data_phenotyping_4week <- data_phenotyping[data_phenotyping$Week=="4-week",]
data_phenotyping <- data_phenotyping[data_phenotyping$Week=="5-week",]




#Merge with He dataset
data_phenotyping <- merge(x = data_phenotyping, 
              y = data_he, by = "Population", all.x=TRUE)
data_phenotyping_4week <- merge(x = data_phenotyping_4week, 
              y = data_he, by = "Population", all.x=TRUE)

#Clean if less than 30 parents 
pop_possible<-unique(data$Population[data$Generation_Eggs==5&data$Nb_adults>=30])
data_phenotyping <- data_phenotyping[data_phenotyping$Population %in% pop_possible,]

pop_possible<-unique(data$Population[data$Generation_Eggs==4&data$Nb_adults>=30])
data_phenotyping_4week <- data_phenotyping_4week[data_phenotyping_4week$Population %in% pop_possible,]

data_phenotyping_all <- rbind(data_phenotyping,data_phenotyping_4week)

1.4 Old He remain

# #Upload growth rate end of experiment
# data_he <- read.table(file=here::here("data", "He_ReMaining_BeatricePop.csv"), sep=",", header=TRUE)
# data_he$Population <- as.factor(data_he$Population)
# data_he <- data_he[,c(1,3)]
# 
# 
# ### Additional: He remaining 
# # New vector for the lost during exp
# data_he$He_remain_exp <- NA
# 
# # He lost during exp
# for(i in levels(data$Population)){
# temp_data_pop <- subset(data, Population==i)
# ifelse(sum(temp_data_pop$Generation_Parents)!=15,print("ERROR"),print(i))
# temp_he_remaining_gen <- 1 - (1/(2*temp_data_pop$Nb_parents))
# 
# #To consider extinct add this row: 
# is.na(temp_he_remaining_gen) <- sapply (temp_he_remaining_gen, is.infinite)
# 
# temp_he_remain_exp <- prod(temp_he_remaining_gen, na.rm = TRUE)
# data_he$He_remain_exp[data_he$Population==i] <- temp_he_remain_exp
# rm(temp_he_remain_exp,temp_he_remaining_gen,temp_data_pop)
# }
# 
# # Remaining He at the end of the experiment
# data_he$He_end <- data_he$He_remain * data_he$He_remain_exp
# 
# # Remove kill population 
# data_he <- data_he[!is.na(data_he$He_remain_exp),]
# data_he <- droplevels(data_he)
# 
# ## Merge dataset He
# data_old_merged <- merge(x = data, y = data_he, by = "Population", all.x=TRUE)

1.5 Survival data

data_survival <- data[data$Generation_Parents==0,c("Population","Block","Genetic_Diversity")]
data_survival$Gen_eggs_extinct <- max(data$Generation_Eggs)
data_survival$Survival <- "yes"

#Format data
for (i in unique(data_survival$Population)) {
  if (data$Extinction_finalstatus[data$Population==i&data$Generation_Eggs==1] == "yes" ) {
    data_survival$Gen_eggs_extinct[data_survival$Population==i]<-
      min(data$Generation_Eggs[data$Population==i&data$First_Throw=="extinct"])
    data_survival$Survival[data_survival$Population==i] <- "extinct"
  }else{
    print(i)
  }
}
## [1] "High1"
## [1] "High13"
## [1] "High14"
## [1] "High15"
## [1] "High16"
## [1] "High17"
## [1] "High18"
## [1] "High19"
## [1] "High2"
## [1] "High20"
## [1] "High21"
## [1] "High22"
## [1] "High23"
## [1] "High24"
## [1] "High25"
## [1] "High26"
## [1] "High27"
## [1] "High28"
## [1] "High3"
## [1] "High32"
## [1] "High33"
## [1] "High34"
## [1] "High35"
## [1] "High4"
## [1] "High5"
## [1] "High6"
## [1] "High7"
## [1] "Low1"
## [1] "Low10"
## [1] "Low11"
## [1] "Low12"
## [1] "Low13"
## [1] "Low14"
## [1] "Low15"
## [1] "Low17"
## [1] "Low18"
## [1] "Low19"
## [1] "Low2"
## [1] "Low20"
## [1] "Low21"
## [1] "Low22"
## [1] "Low24"
## [1] "Low25"
## [1] "Low26"
## [1] "Low29"
## [1] "Low3"
## [1] "Low34"
## [1] "Low35"
## [1] "Low4"
## [1] "Low5"
## [1] "Low7"
## [1] "Low8"
## [1] "Low9"
## [1] "Med1"
## [1] "Med10"
## [1] "Med11"
## [1] "Med12"
## [1] "Med13"
## [1] "Med15"
## [1] "Med18"
## [1] "Med19"
## [1] "Med2"
## [1] "Med21"
## [1] "Med23"
## [1] "Med24"
## [1] "Med3"
## [1] "Med4"
## [1] "Med6"
## [1] "Med7"
## [1] "Med8"
## [1] "Med9"
str(data_survival)
## 'data.frame':    84 obs. of  5 variables:
##  $ Population       : Factor w/ 84 levels "High1","High13",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Block            : Factor w/ 3 levels "Block3","Block4",..: 1 2 2 2 2 2 2 2 1 2 ...
##  $ Genetic_Diversity: Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Gen_eggs_extinct : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ Survival         : chr  "yes" "yes" "yes" "yes" ...
data_survival$Survival <- as.factor(data_survival$Survival)
#Note: the Surv() function in the {survival} package accepts by default TRUE/FALSE, where TRUE is event and FALSE is censored; 1/0 where 1 is event and 0 is censored; or 2/1 where 2 is event and 1 is censored. Please take care to ensure the event indicator is properly formatted.
data_survival$status <- ifelse(data_survival$Survival=="extinct", 1, 0)

2 PLOT

2.1 Population size

PLOT_Pop_size<-ggplot2::ggplot(data, aes(x = factor(Generation_Eggs), 
                                y = Nb_adults,  
                                group = Population,
                                colour = Genetic_Diversity)) + 
  geom_point(position = position_dodge(0.4), size = 0.8, alpha = 0.7) +
  geom_line(position = position_dodge(0.4), size = 0.2, alpha = 0.6) +
  ylab("Population size") +
  labs(color="Genetic diversity") +
  xlab("Generation") +
  theme_LO
PLOT_Pop_size

SUM_Popsize <- Rmisc::summarySE(data, 
                        measurevar = c("Nb_adults"),
                       groupvars = c("Generation_Eggs",
                                     "Genetic_Diversity"), 
                       na.rm=TRUE)
SUM_Popsize
##    Generation_Eggs Genetic_Diversity  N Nb_adults        sd        se       ci
## 1                1              High 27 100.00000   0.00000  0.000000  0.00000
## 2                1            Medium 23 100.00000   0.00000  0.000000  0.00000
## 3                1               Low 34 100.00000   0.00000  0.000000  0.00000
## 4                2              High 27 344.29630  73.77294 14.197610 29.18360
## 5                2            Medium 23 282.04348 100.75735 21.009360 43.57075
## 6                2               Low 34 282.61765  78.53006 13.467794 27.40043
## 7                3              High 27 151.85185  80.96899 15.582489 32.03027
## 8                3            Medium 23 118.73913  88.54491 18.462891 38.28969
## 9                3               Low 34 104.61765  85.13341 14.600260 29.70445
## 10               4              High 26 114.00000  80.20224 15.728954 32.39439
## 11               4            Medium 23  62.65217  64.50483 13.450188 27.89398
## 12               4               Low 34  46.38235  39.63241  6.796903 13.82840
## 13               5              High 27 103.62963  51.56486  9.923661 20.39838
## 14               5            Medium 21  58.57143  51.11122 11.153383 23.26555
## 15               5               Low 31  51.51613  46.13377  8.285870 16.92200
## 16               6              High 27 147.66667  42.60282  8.198916 16.85311
## 17               6            Medium 19  69.73684  46.02396 10.558620 22.18284
## 18               6               Low 28  77.03571  46.14428  8.720450 17.89289
# SUM_Popsize <- Rmisc::summarySE(data[data$Extinction_finalstatus=="no",],
#                         measurevar = c("Nb_adults"),
#                        groupvars = c("Generation_Eggs",
#                                      "Genetic_Diversity"),
#                        na.rm=TRUE)
# SUM_Popsize



PLOT_Pop_size_average <- PLOT_Pop_size + 
  geom_point(data = SUM_Popsize, aes(x = factor(Generation_Eggs), 
                                y = Nb_adults, 
                                group = Genetic_Diversity,
                                colour = Genetic_Diversity), 
             size = 5, position = position_dodge(0.4)) +
  geom_errorbar(data = SUM_Popsize, aes(x = factor(Generation_Eggs), 
                                ymin = Nb_adults-ci, ymax = Nb_adults+ci, 
                                group = Genetic_Diversity,
                                colour = Genetic_Diversity), 
             size = 1, width=.2,  position = position_dodge(0.4)) +
  geom_line(data = SUM_Popsize, aes(x = factor(Generation_Eggs), 
                                y = Nb_adults, 
                                group = Genetic_Diversity,
                                colour = Genetic_Diversity), 
           linetype = "dashed", size = 1.5, position = position_dodge(0.4)) 
PLOT_Pop_size_average

# cowplot::save_plot(file = here::here("figures","PopulationSize_average.pdf"),   PLOT_Pop_size_average, base_height = 10/cm(1),           base_width = 15/cm(1), dpi = 610)






#Prediction with models 
# Test effect

# data$gen_square <- data$Generation_Eggs^2
mod1 <- lme4::glmer(Nb_adults~Generation_Eggs*Genetic_Diversity +
                      gen_square*Genetic_Diversity + 
                Block  + (1|obs), 
            data = data[data$Generation_Eggs>=2&data$Extinction_finalstatus=="no",], 
            family = "poisson")



#Predict data
filldata <- data.frame(Genetic_Diversity = rep(levels(data$Genetic_Diversity),100), 
                       Generation_Eggs = rep(seq(2,6, length = 100),each=3),
                       Block = "Block4",
                       gen_square = (rep(seq(2,6, length = 100),each=3))^2,
                       Estimates = NA)

filldata$Estimates <- predict(mod1, newdata=filldata, type = "response", re.form=~0)


#Predcit estimate minimun values 
tapply(filldata$Estimates, filldata$Genetic_Diversity, min)
##     High      Low   Medium 
## 85.85198 44.16308 51.99642
filldata[filldata$Estimates>=44.16&filldata$Estimates<=44.17,]
##     Genetic_Diversity Generation_Eggs  Block gen_square Estimates
## 198               Low        4.626263 Block4   21.40231  44.16308
filldata[filldata$Estimates>=51.99&filldata$Estimates<=52.00,]
##     Genetic_Diversity Generation_Eggs  Block gen_square Estimates
## 230            Medium        5.070707 Block4   25.71207  51.99642
filldata[filldata$Estimates>=85.85&filldata$Estimates<=85.86,]
##     Genetic_Diversity Generation_Eggs  Block gen_square Estimates
## 181              High        4.424242 Block4   19.57392  85.85198
## Change name vector 
vector_names <- c(`Low` = "Strong bottleneck",
                    `Medium` = "Intermediate bottleneck",
                    `High` = "Diverse")


PLOT_Pop_size_predict<-ggplot2::ggplot(data[data$Extinction_finalstatus=="no",], 
                               aes(x = factor(Generation_Eggs), 
                                y = Nb_adults)) + 
  geom_point(aes(group = Population,
                 colour = Genetic_Diversity), 
             position = position_dodge(0.4), size = 0.7, alpha = 0.5) +
  geom_line(aes(group = Population,
                colour = Genetic_Diversity), 
            position = position_dodge(0.4), size = 0.05, alpha = 0.5) +
  geom_line(data = filldata, aes(x = Generation_Eggs, 
                                y = Estimates, 
                               colour = Genetic_Diversity), size = 1.4) + 
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07", "#E7B800")) +
  ylab("Population size") +
  xlab("Generation") +
  theme_LO
PLOT_Pop_size_predict

# 
# cowplot::save_plot(file = here::here("figures","PopulationSize_predict.pdf"),   PLOT_Pop_size_predict, base_height = 10/cm(1),           base_width = 15/cm(1), dpi = 610)
# 
# 

2.2 Extinction

#Percent of extinction
length(unique(data$Population[data$Genetic_Diversity=="Low"&
                                data$Extinction_finalstatus=="yes"]))/
  length(unique(data$Population[data$Genetic_Diversity=="Low"]))
## [1] 0.2352941
length(unique(data$Population[data$Genetic_Diversity=="Medium"&
                                data$Extinction_finalstatus=="yes"]))/
length(unique(data$Population[data$Genetic_Diversity=="Medium"]))
## [1] 0.2173913
length(unique(data$Population[data$Genetic_Diversity=="High"&
                                data$Extinction_finalstatus=="yes"]))/
  length(unique(data$Population[data$Genetic_Diversity=="High"]))
## [1] 0
#Create data with percent
vector_names <- c(`Low` = "Strong bottleneck",
                    `Medium` = "Intermediate bottleneck",
                    `High` = "Diverse")
f_labels = data.frame(Genetic_Diversity = c("High","Medium","Low"), 
                       label = c("Extinction = 0 %", "Extinction = 21.7 %", "Extinction = 23.5 %"))
f_labels$Genetic_Diversity <- factor(f_labels$Genetic_Diversity, 
                                     levels = c("High", "Medium", "Low"))






## Extinction yes no
PLOT_Extinction<-ggplot2::ggplot(data,  aes(x = factor(Generation_Eggs), 
                                y = Nb_adults)) + 
  facet_wrap(~ Genetic_Diversity, ncol=3, labeller = ggplot2::as_labeller(vector_names)) + 
  #geom_point(position = position_dodge(0.4), size = 0.4, alpha = 0.7) +
  geom_line(aes(group = Population,
                colour = Extinction_finalstatus), 
            position = position_dodge(0.1), size = 0.4, alpha = 0.8) +
  geom_text(x = 5, y = 420, 
            aes(label = label), 
            data = f_labels, 
            col="red", 
            size = 3, 
            vjust = 0) +
  scale_color_manual(values = c("black", "red")) +
  ylab("Population size") +
  xlab("Generation") +
  theme_LO + 
  theme(strip.background = element_rect(color="grey30", 
                                        fill="white", size=0.5, linetype="solid"), 
        strip.text.x = element_text(size = 10, color = "black", face = "bold"),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(), 
        legend.position = "none")

PLOT_Extinction

# 
# 
# cowplot::save_plot(here::here("figures","Fig2_Extinction.pdf"),   PLOT_Extinction, base_height = 8/cm(1),
#           base_width = 20/cm(1), dpi = 610)
#

# 


PLOT_Extinction 

2.3 Growth rate G2

tapply(data[data$Generation_Eggs==2,]$Lambda,data[data$Generation_Eggs==2,]$Genetic_Diversity,mean)
##     High   Medium      Low 
## 3.442963 2.820435 2.826176
PLOT_Growth<-ggplot2::ggplot(data[data$Generation_Eggs==2,], aes(x = Genetic_Diversity, y = Lambda, 
                         colour = Genetic_Diversity)) + 
  geom_boxplot(aes(group = Genetic_Diversity),outlier.colour = NA) +
  geom_jitter(width = 0.25, size = 1, alpha = 0.8) +
  ylab("Growth rate") +
  theme(legend.position = "none") +
  xlab("Genetic diversity") +
  theme_LO
PLOT_Growth

# 
# cowplot::save_plot(here::here("figures","OldPlot_Growth_Start.pdf"),   PLOT_Growth, base_height = 10/cm(1),
#           base_width = 8/cm(1), dpi = 610)

2.4 Life stage proportion

SUM_Prop_adults<- Rmisc::summarySE(data_phenotyping_all, 
                        measurevar = c("p_adults"),
                       groupvars = c("Genetic_Diversity","Week"), 
                       na.rm = TRUE)
SUM_Prop_adults
##   Genetic_Diversity   Week  N  p_adults         sd         se         ci
## 1              High 4-week 40 0.6315807 0.18084723 0.02859446 0.05783775
## 2              High 5-week 94 0.9646852 0.05234098 0.00539856 0.01072047
## 3            Medium 4-week 18 0.7261684 0.16721535 0.03941304 0.08315424
## 4            Medium 5-week 38 0.9594668 0.06046606 0.00980889 0.01987470
## 5               Low 4-week 27 0.6649974 0.19731065 0.03797245 0.07805350
## 6               Low 5-week 52 0.9538157 0.08241700 0.01142918 0.02294504
SUM_Prop_pupae<- Rmisc::summarySE(data_phenotyping_all, 
                        measurevar = c("p_pupae"),
                       groupvars = c("Genetic_Diversity","Week"), 
                       na.rm = TRUE)
SUM_Prop_pupae
##   Genetic_Diversity   Week  N    p_pupae         sd          se          ci
## 1              High 4-week 40 0.31153108 0.15526911 0.024550202 0.049657471
## 2              High 5-week 94 0.01417884 0.02356334 0.002430373 0.004826238
## 3            Medium 4-week 18 0.23410139 0.15149574 0.035707888 0.075337059
## 4            Medium 5-week 38 0.01931921 0.02904804 0.004712215 0.009547854
## 5               Low 4-week 27 0.28241476 0.17824104 0.034302504 0.070509807
## 6               Low 5-week 52 0.01796037 0.02546214 0.003530964 0.007088705
SUM_Prop_larvae<- Rmisc::summarySE(data_phenotyping_all, 
                        measurevar = c("p_larvae"),
                       groupvars = c("Genetic_Diversity","Week"), 
                       na.rm = TRUE)
SUM_Prop_larvae
##   Genetic_Diversity   Week  N   p_larvae         sd          se          ci
## 1              High 4-week 40 0.05688822 0.04383041 0.006930197 0.014017646
## 2              High 5-week 94 0.02113595 0.04275336 0.004409672 0.008756736
## 3            Medium 4-week 18 0.03973025 0.03784416 0.008919954 0.018819458
## 4            Medium 5-week 38 0.02121399 0.04478738 0.007265472 0.014721244
## 5               Low 4-week 27 0.05258789 0.08541162 0.016437473 0.033787710
## 6               Low 5-week 52 0.02822390 0.07073457 0.009809120 0.019692631
SUM_Prop <- data.frame(SUM_Prop_adults[,1:4],
                       p_pupae=SUM_Prop_pupae$p_pupae,
                       p_larvae=SUM_Prop_larvae$p_larvae)


SUM_Prop<-tidyr::gather(SUM_Prop,"Stage", "Proportion",4:6,-"Genetic_Diversity")
SUM_Prop
##    Genetic_Diversity   Week  N    Stage Proportion
## 1               High 4-week 40 p_adults 0.63158069
## 2               High 5-week 94 p_adults 0.96468521
## 3             Medium 4-week 18 p_adults 0.72616835
## 4             Medium 5-week 38 p_adults 0.95946680
## 5                Low 4-week 27 p_adults 0.66499735
## 6                Low 5-week 52 p_adults 0.95381574
## 7               High 4-week 40  p_pupae 0.31153108
## 8               High 5-week 94  p_pupae 0.01417884
## 9             Medium 4-week 18  p_pupae 0.23410139
## 10            Medium 5-week 38  p_pupae 0.01931921
## 11               Low 4-week 27  p_pupae 0.28241476
## 12               Low 5-week 52  p_pupae 0.01796037
## 13              High 4-week 40 p_larvae 0.05688822
## 14              High 5-week 94 p_larvae 0.02113595
## 15            Medium 4-week 18 p_larvae 0.03973025
## 16            Medium 5-week 38 p_larvae 0.02121399
## 17               Low 4-week 27 p_larvae 0.05258789
## 18               Low 5-week 52 p_larvae 0.02822390
SUM_Prop$Stage <- factor(SUM_Prop$Stage, levels = c("p_adults", "p_pupae", "p_larvae"))



# New facet label names for supp variable
supp.weeks <- c("4 week", "5 week")
names(supp.weeks) <- c("4-week", "5-week")
 


PLOT_prop <- ggplot2::ggplot(data = SUM_Prop, aes(x = Genetic_Diversity, 
                                                  y = Proportion, fill = Stage)) +
  facet_wrap(~ Week, ncol=2, 
             labeller = labeller(Week = supp.weeks)) +
  geom_bar(stat="identity") + 
  xlab("Demographic history") +
  labs(color="Stage of individuals") +
  ylab("Proportion of each stage") +
  scale_fill_manual(values= c("#619CFF","#F8766D","#00BA38"), 
                    breaks = c("p_adults", "p_pupae", "p_larvae"),
                    labels = c( "Adult", "Pupae","Larvae")) + 
  ggplot2::scale_x_discrete(breaks=c("High", "Medium", "Low"), 
                            labels=c("Diverse", 
                                     "Intermediate\nbottleneck", 
                                      "Strong \nbottleneck")) + 
  theme_LO + 
  theme(strip.background = element_rect(color="grey30", 
                                        fill="white", size=0.5, linetype="solid"), 
        strip.text.x = element_text(size = 12, color = "black", face = "bold"),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())
PLOT_prop

# 
# cowplot::save_plot(here::here("figures","FigS2_Life_stage_proportion.pdf"),   PLOT_prop,
#           base_height = 10/cm(1), base_width = 18/cm(1), dpi = 610)
# 
# 

2.5 Growth rate and He

# Remove pseudoreplication: average of each population
SUM_POP_He_Mean<- Rmisc::summarySE(data_phenotyping,
                            measurevar = c("Lambda"),
                            groupvars = c("Genetic_Diversity",
                                          "He_end",
                                          "Population"), 
                            na.rm=TRUE)




PLOT_He_Final <- ggplot2::ggplot(SUM_POP_He_Mean, aes(x = He_end,
                                                      y = Lambda, 
                                                      colour = Genetic_Diversity)) + 
    geom_point(size = 2, alpha = 0.8) +
    geom_errorbar(data = SUM_POP_He_Mean, aes(x = He_end, 
                                ymin = Lambda-se, ymax = Lambda+se,
                                colour = Genetic_Diversity), 
             size = 0.6, width=0, alpha = 0.8) + 
    geom_point(size = 3, alpha = 0.8) +
    ylab("Growth rate") +
    xlab("Expected heterozygosity") +
    scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07","#E7B800")) +
    labs(size="Replicates") + 
    theme_LO 
PLOT_He_Final

# 
# 
# cowplot::save_plot(here::here("figures","Heterozygosity_Growthrate.pdf"),
#                    PLOT_He_Final,
#           base_height = 12/cm(1), base_width = 16/cm(1), dpi = 610)
# 
# 



# # ALL WITHOUT PSEUDO
# SUM_POP_He_ALL<- Rmisc::summarySE(data_evolution_Lambda,
#                             measurevar = c("Lambda"),
#                        groupvars = c("Genetic_Diversity","Phenotyping", "He_remain", "He_end",
#                                      "Population"), 
#                        na.rm=TRUE)
# 
# SUM_POP_He_ALL$HE <- ifelse(SUM_POP_He_ALL$Phenotyping=="Initial",SUM_POP_He_ALL$He_remain,SUM_POP_He_ALL$He_end)
# 
# SUM_POP_He_ALL <- SUM_POP_He_ALL[!is.na(SUM_POP_He_ALL$He_end),]
# SUM_POP_He_ALL <- SUM_POP_He_ALL[SUM_POP_He_ALL$HE!="-Inf",]
# 
# 
# facet_names <- c('Initial'="Before rescue experiment",'Final'="After rescue experiment")
# 
# PLOT_He_ALL <- ggplot2::ggplot(SUM_POP_He_ALL, aes(x = HE, y = Lambda, 
#                          colour = Genetic_Diversity, 
#                          shape = Phenotyping)) +
#   #facet_wrap(~ Phenotyping, ncol=1) +
#   geom_point(size = 3, alpha = 0.8) +
#   scale_shape_manual(values = c(1, 19)) + 
#   ylab("Growth rate") +
#   xlab("Remaining heterozygosity") +
#   theme_LO 
# PLOT_He_ALL
# 
# 
# PLOT_He_ALL <- ggplot2::ggplot(SUM_POP_He_ALL, aes(x = HE, y = Lambda, 
#                          colour = Genetic_Diversity, 
#                          shape = Phenotyping)) +
#   facet_wrap(~ Phenotyping, ncol=1) +
#   geom_point(size = 3, alpha = 0.8) +
#   scale_shape_manual(values = c(1, 19)) + 
#   ylab("Growth rate") +
#   xlab("Remaining heterozygosity") +
#   theme_LO
# PLOT_He_ALL
# 
# 
# PLOT_He_ALL <- ggplot2::ggplot(SUM_POP_He_ALL, aes(x = HE, y = Lambda,
#                          colour = Genetic_Diversity)) +
#   facet_wrap(~ Phenotyping, ncol=1) +
#   geom_point(size = 3, alpha = 0.8) +
#   ylab("Growth rate") +
#   xlab("Remaining heterozygosity") +
#   theme_LO
# PLOT_He_ALL

2.6 Lifestage and He

SUM_Prop_adults<- Rmisc::summarySE(data_phenotyping_all, 
                        measurevar = c("p_adults"),
                       groupvars = c("Genetic_Diversity","Week", "Population", "He_end"), 
                       na.rm = TRUE)
colnames(SUM_Prop_adults)[6]<-"prop" 
SUM_Prop_adults$lifestage <- "adults" 


SUM_Prop_pupae<- Rmisc::summarySE(data_phenotyping_all, 
                        measurevar = c("p_pupae"),
                       groupvars = c("Genetic_Diversity","Week", "Population", "He_end"), 
                       na.rm = TRUE)
colnames(SUM_Prop_pupae)[6]<-"prop" 
SUM_Prop_pupae$lifestage <- "pupae" 

SUM_Prop_larvae<- Rmisc::summarySE(data_phenotyping_all, 
                        measurevar = c("p_larvae"),
                       groupvars = c("Genetic_Diversity","Week", 
                                     "Population", "He_end"), 
                       na.rm = TRUE)
colnames(SUM_Prop_larvae)[6]<-"prop" 
SUM_Prop_larvae$lifestage <- "larvae" 


SUM_Prop_POP <- rbind(SUM_Prop_adults, SUM_Prop_pupae, SUM_Prop_larvae)



PLOT_Proportion_lifestage <- ggplot2::ggplot(SUM_Prop_POP, aes(x = He_end,
                                                      y = prop, 
                                                      colour = Genetic_Diversity, 
                                                      size = N)) + 
    facet_grid( Week ~ factor(lifestage, levels=c('larvae','pupae','adults'))) +
    geom_point(alpha = 0.8) +
    ylab("Proportion") +
    xlab("Expected heterozygosity") +
    scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07", "#E7B800")) + 
    labs(size="Replicates") + 
    theme_LO + theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())
PLOT_Proportion_lifestage

 # 
 # cowplot::save_plot(here::here("figures","Life_stage_proportion_He.pdf"),
 #            PLOT_Proportion_lifestage,
 #           base_height = 14/cm(1), base_width = 36/cm(1), dpi = 610)
 # 

# 

2.7 He over time

#Compute for all the other genrations, except the first one 
data$ID_extinction <- "extant"

for(i in unique(data$Population[data$Extinction_finalstatus=="yes"])){
    gen_all <- data$Generation_Eggs[data$Population==i&!is.na(data$He_remain)]
    maxgen <- max(gen_all)
    data$ID_extinction[data$Population==i&data$Generation_Eggs==maxgen] <- "willextinct" 
}

data[data$ID_extinction=="willextinct",]
##     Population  Block         Old_Label                  Label
## 205      Low16 Block4             B4-O1 7/14 BE B4 | G5 Low 16
## 273      Low27 Block5             B5-B1 6/16 BE B5 | G4 Low 27
## 278      Low28 Block5             B5_E1 5/12 BE B5 | G3 Low 28
## 299      Low30 Block5             B5-D1 6/16 BE B5 | G4 Low 30
## 303      Low31 Block5         B(2)5-P1  6/16 BE B5 | G4 Low 31
## 310      Low32 Block5         B(2)5_Q1  5/12 BE B5 | G3 Low 32
## 317      Low33 Block5         B(2)5-T1  7/21 BE B5 | G5 Low 33
## 336      Low36 Block5         B(2)5-X1  6/16 BE B5 | G4 Low 36
## 402      Med14 Block4   B4-Backup Fam L  6/9 BE B4 | G4 Med 14
## 409      Med17 Block5   B5_Backup_Fam_F 5/12 BE B5 | G3 Med 17
## 437      Med20 Block5 B5 - Backup Fam I 6/16 BE B5 | G4 Med 20
## 449      Med22 Block5   B5_Backup_Fam_B 5/12 BE B5 | G3 Med 22
## 479       Med5 Block3 B3 - Backup Fam E   7/7 BE B3 | G5 Med 5
##     Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## 205               Low                  4               5     7/14/21
## 273               Low                  3               4     6/16/21
## 278               Low                  2               3     5/12/21
## 299               Low                  3               4     6/16/21
## 303               Low                  3               4     6/16/21
## 310               Low                  2               3     5/12/21
## 317               Low                  4               5     7/21/21
## 336               Low                  3               4     6/16/21
## 402            Medium                  3               4      6/9/21
## 409            Medium                  2               3     5/12/21
## 437            Medium                  3               4     6/16/21
## 449            Medium                  2               3     5/12/21
## 479            Medium                  4               5      7/7/21
##     Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2   MC_Box1   MC_Box2
## 205         7/14/21       7/15/21       <NA>   DSC_0994  0.000000  2.000000
## 273         6/16/21       6/17/21   DSC_0526   DSC_0527  5.090878  0.000000
## 278         5/12/21       5/13/21   DSC_0918   DSC_0919  3.085768  3.638847
## 299         6/16/21       6/17/21   DSC_0559       <NA>  1.383920  0.000000
## 303         6/16/21       6/17/21       <NA>   DSC_0541  0.000000  1.000000
## 310         5/12/21       5/13/21   DSC_0953   DSC_0954 88.254170 70.952124
## 317         7/21/21       7/22/21   DSC_0117   DSC_0118  1.000000  1.000000
## 336         6/16/21       6/17/21   DSC_0540       <NA>  1.000000  0.000000
## 402          6/9/21       6/10/21   DSC_0376   DSC_0377 10.974990  1.000000
## 409         5/12/21       5/13/21   DSC_0916   DSC_0917  1.543122  1.084327
## 437         6/16/21       6/17/21   DSC_0542       <NA>  1.000000  0.000000
## 449         5/12/21       5/13/21   DSC_0922   DSC_0923  9.168447  6.251069
## 479          7/7/21        7/8/21       <NA>   DSC_0830        NA        NA
##     MC_Total_Adults HC_Box1 HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## 205             2.0       0       2               2          5 0.400000000
## 273             5.1       3       0               3         19 0.157894737
## 278             6.7       1       1               2        374 0.005347594
## 299             1.4       1       0               1        146 0.006849315
## 303             1.0       1       1               2        272 0.007352941
## 310           159.2      71      56             127        276 0.460144928
## 317             2.0       1       1               2          3 0.666666667
## 336             1.0       1       0               1         22 0.045454545
## 402            12.0       7       1               8        138 0.057971014
## 409             2.6       3       2               5        416 0.012019231
## 437             1.0       1       0               1         20 0.050000000
## 449            15.4      10       6              16        200 0.080000000
## 479             0.0      NA      NA               1         11 0.090909091
##     First_Throw Extinction_finalstatus Less_than_5 Nb_adults      Lambda obs
## 205          no                    yes         yes         2 0.400000000 378
## 273          no                    yes         yes         3 0.157894737 319
## 278          no                    yes         yes         2 0.005347594 236
## 299          no                    yes         yes         1 0.006849315 322
## 303          no                    yes         yes         2 0.007352941 323
## 310          no                    yes         yes       127 0.460144928 240
## 317          no                    yes         yes         2 0.666666667 409
## 336          no                    yes         yes         1 0.045454545 328
## 402          no                    yes         yes         8 0.057971014 308
## 409          no                    yes         yes         5 0.012019231 245
## 437          no                    yes         yes         1 0.050000000 332
## 449          no                    yes         yes        16 0.080000000 250
## 479          no                    yes         yes         1 0.090909091 359
##     gen_square  He_start He_lost_gen_t He_remain    He_exp    He_end
## 205         25 0.5544299     0.7500000 0.3575672 0.6449276 0.3575672
## 273         16 0.5367998     0.8333333 0.4324691 0.8056432 0.4324691
## 278          9 0.5386585     0.7500000 0.4014365 0.7452523 0.4014365
## 299         16 0.5540444     0.5000000 0.2742819 0.4950540 0.2742819
## 303         16 0.5532775     0.7500000 0.4116634 0.7440450 0.4116634
## 310          9 0.5528880     0.9960630 0.5469651 0.9892872 0.5469651
## 317         25 0.5523333     0.7500000 0.3359893 0.6083089 0.3359893
## 336         16 0.5427371     0.5000000 0.2635269 0.4855518 0.2635269
## 402         16 0.6802331     0.9375000 0.6315974 0.9285014 0.6315974
## 409          9 0.6825509     0.9000000 0.6104897 0.8944237 0.6104897
## 437         16 0.6819650     0.5000000 0.3302071 0.4841994 0.3302071
## 449          9 0.6809168     0.9687500 0.6546991 0.9614965 0.6546991
## 479         25 0.6807065     0.5000000 0.3209456 0.4714889 0.3209456
##     ID_extinction
## 205   willextinct
## 273   willextinct
## 278   willextinct
## 299   willextinct
## 303   willextinct
## 310   willextinct
## 317   willextinct
## 336   willextinct
## 402   willextinct
## 409   willextinct
## 437   willextinct
## 449   willextinct
## 479   willextinct
dim(data[data$ID_extinction=="willextinct",])
## [1] 13 33
data[data$ID_extinction=="willextinct","Population"]
##  [1] Low16 Low27 Low28 Low30 Low31 Low32 Low33 Low36 Med14 Med17 Med20 Med22
## [13] Med5 
## 84 Levels: High1 High13 High14 High15 High16 High17 High18 High19 ... Med9
data$Population[data$Genetic_Diversity=="Medium"&data$Extinction_finalstatus=="yes"]
##  [1] Med14 Med14 Med14 Med14 Med14 Med14 Med17 Med17 Med17 Med17 Med17 Med17
## [13] Med20 Med20 Med20 Med20 Med20 Med20 Med22 Med22 Med22 Med22 Med22 Med22
## [25] Med5  Med5  Med5  Med5  Med5  Med5 
## 84 Levels: High1 High13 High14 High15 High16 High17 High18 High19 ... Med9
PLOT_He_extinction <-ggplot2::ggplot(data, aes(x = factor(Generation_Eggs), 
                                y = He_remain, 
                                group = Population,
                                colour = Extinction_finalstatus, 
                                shape = ID_extinction)) + 
  geom_line(position = position_dodge(0.5), 
            size = 0.25, alpha = 0.6) +
  geom_point(position = position_dodge(0.5), size = 3,  stroke = 0.6,
             alpha = 0.8) +
  ylab("Expected heterozygozity") +
  scale_color_manual(values = c("black", "red")) +
  scale_shape_manual(values = c(21, 13), guide=FALSE) +
  labs(color="Extinct populations") +
  guides(color = guide_legend(
    override.aes=list(shape = 21, fill="white"))) +
  xlab("Generation") +
  theme_LO
PLOT_He_extinction

# #
# cowplot::save_plot(here::here("figures","FigS1_He_over_time.pdf"),
#                    PLOT_He_extinction,
#           base_height = 12/cm(1), base_width = 14/cm(1), dpi = 610)
# 

tapply(data$He_remain[data$Generation_Eggs==1], data$Genetic_Diversity[data$Generation_Eggs==1], mean)
##      High    Medium       Low 
## 0.9936078 0.6757290 0.5414677

3 ANALYSIS

3.1 Survival analysis

3.1.0.1 Basic model

library(dplyr)

################################# SURVIVAL ANALYSIS
str(data_survival)
## 'data.frame':    84 obs. of  6 variables:
##  $ Population       : Factor w/ 84 levels "High1","High13",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Block            : Factor w/ 3 levels "Block3","Block4",..: 1 2 2 2 2 2 2 2 1 2 ...
##  $ Genetic_Diversity: Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Gen_eggs_extinct : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ Survival         : Factor w/ 2 levels "extinct","yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ status           : num  0 0 0 0 0 0 0 0 0 0 ...
#Note: the Surv() function in the {survival} package accepts by default TRUE/FALSE, where TRUE is event and FALSE is censored; 1/0 whejere 1 is event and 0 is censored; or 2/1 where 2 is event and 1 is censored. Please take care to ensure the event indicator is properly formatted.


# The Surv() function from the {survival} package creates a survival object for use as the response in a model formula. There will be one entry for each subject that is the survival time, which is followed by a + if the subject was censored. Let’s look at the first 10 observations:
survival::Surv(data_survival$Gen_eggs_extinct, data_survival$status)
##  [1] 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+
## [26] 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6  6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 5  4  6+ 6+ 5 
## [51] 5  4  6  6+ 6+ 5  6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 5  6+ 4  6+ 6+ 6+ 5  6+ 4 
## [76] 6+ 6+ 6+ 6+ 6  6+ 6+ 6+ 6+
#Surv(data_survival_all$Gen_eggs_extinct, data_survival$status)



# The survfit() function creates survival curves using the Kaplan-Meier method based on a formula. Let’s generate the overall survival curve for the entire cohort, assign it to object s1, and look at the structure using str():
s1 <- survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ 1, data = data_survival)
str(s1)
## List of 16
##  $ n        : int 84
##  $ time     : num [1:3] 4 5 6
##  $ n.risk   : num [1:3] 84 80 74
##  $ n.event  : num [1:3] 4 6 3
##  $ n.censor : num [1:3] 0 0 71
##  $ surv     : num [1:3] 0.952 0.881 0.845
##  $ std.err  : num [1:3] 0.0244 0.0401 0.0467
##  $ cumhaz   : num [1:3] 0.0476 0.1226 0.1632
##  $ std.chaz : num [1:3] 0.0238 0.0388 0.0453
##  $ type     : chr "right"
##  $ logse    : logi TRUE
##  $ conf.int : num 0.95
##  $ conf.type: chr "log"
##  $ lower    : num [1:3] 0.908 0.814 0.771
##  $ upper    : num [1:3] 0.999 0.953 0.926
##  $ call     : language survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~ 1, data = data_survival)
##  - attr(*, "class")= chr "survfit"
# time: the timepoints at which the curve has a step, i.e. at least one event occurred
# surv: the estimate of survival at the corresponding time



# The {ggsurvfit} package works best if you create the survfit object using the included ggsurvfit::survfit2() function, which uses the same syntax to what we saw previously with survival::survfit(). The ggsurvfit::survfit2() tracks the environment from the function call, which allows the plot to have better default values for labeling and p-value reporting.
# 

####
#### PLOT 
####
ggsurvfit::survfit2(survival::Surv(Gen_eggs_extinct, status) ~ 1, data = data_survival) %>% 
  ggsurvfit::ggsurvfit() +
  ggplot2::labs( x = "Generation",
                 y = "Overall survival probability") + 
  ggsurvfit::add_confidence_interval()  +
  ggsurvfit::add_risktable()

summary(survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ 1, data = data_survival))
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~ 
##     1, data = data_survival)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     4     84       4    0.952  0.0232        0.908        0.999
##     5     80       6    0.881  0.0353        0.814        0.953
##     6     74       3    0.845  0.0395        0.771        0.926

3.1.0.2 Comparison across groups

######################################################
##### COMPARISONS ACROSS GROUPS
######################################################
# We can conduct between-group significance tests using a log-rank test. The log-rank test equally weights observations over the entire follow-up time and is the most common way to compare survival times between groups. There are versions that more heavily weight the early or late follow-up that could be more appropriate depending on the research question (see ?survdiff for different test options).
# 
# We get the log-rank p-value using the survdiff function. For example, we can test whether there was a difference in survival time according to the demography history in the lung data:



#Comparison 
survival::survdiff(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival)
## Call:
## survival::survdiff(formula = survival::Surv(Gen_eggs_extinct, 
##     status) ~ Genetic_Diversity, data = data_survival)
## 
##                           N Observed Expected (O-E)^2/E (O-E)^2/V
## Genetic_Diversity=High   27        0     4.41     4.405      7.00
## Genetic_Diversity=Medium 23        5     3.44     0.707      1.01
## Genetic_Diversity=Low    34        8     5.15     1.571      2.73
## 
##  Chisq= 7  on 2 degrees of freedom, p= 0.03
summary(survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
                          data = data_survival))
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~ 
##     Genetic_Diversity, data = data_survival)
## 
##                 Genetic_Diversity=High 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Genetic_Diversity=Medium 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     4     23       2    0.913  0.0588        0.805        1.000
##     5     21       2    0.826  0.0790        0.685        0.996
##     6     19       1    0.783  0.0860        0.631        0.971
## 
##                 Genetic_Diversity=Low 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     4     34       2    0.941  0.0404        0.865        1.000
##     5     32       4    0.824  0.0654        0.705        0.962
##     6     28       2    0.765  0.0727        0.635        0.921
##################
################## PLOT 
##################
plot_survival <- ggsurvfit::survfit2(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
                                     data = data_survival) %>% 
  ggsurvfit::ggsurvfit(size = 2) +
  labs( x = "Generation",
        y = "Overall survival probability") + 
  ggsurvfit::add_confidence_interval(alpha = 0.1)  + 
  ggsurvfit::add_censor_mark(size = 8, shape = "x") +
  scale_fill_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"),
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
                     values = c("#00AFBB","#FD7C49","#F5C400")) +
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"),
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
                     values = c("#00AFBB","#FD7C49","#F5C400")) +
  ylab("Survival probability")
plot_survival

#rm(plot_survival)

# cowplot::save_plot(file = here::here("figures","FigS1_Extinction_Survival_analysis.pdf"),
#                    plot_survival, base_height = 12/cm(1),  base_width = 18/cm(1), dpi = 610)


# COX representation 
s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block, 
            data = data_survival)
s2 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Block, 
            data = data_survival)
anova(s1, s2)
## Analysis of Deviance Table
##  Cox model: response is  survival::Surv(Gen_eggs_extinct, status)
##  Model 1: ~ Genetic_Diversity + Block
##  Model 2: ~ Block
##    loglik  Chisq Df Pr(>|Chi|)   
## 1 -43.520                        
## 2 -49.654 12.268  2   0.002168 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival) %>% 
  gtsummary::tbl_regression(exp = TRUE) 
Characteristic HR1 95% CI1 p-value
Genetic_Diversity
High — —
Medium 347,496,586 0.00, Inf >0.9
Low 372,735,433 0.00, Inf >0.9
1 HR = Hazard Ratio, CI = Confidence Interval
# The quantity of interest from a Cox regression model is a hazard ratio (HR). The HR represents the ratio of hazards between two groups at any particular point in time. The HR is interpreted as the instantaneous rate of occurrence of the event of interest in those who are still at risk for the event. It is not a risk, though it is commonly mis-interpreted as such. If you have a regression parameter β, then HR = exp(β).
# 
# A HR < 1 indicates reduced hazard of death whereas a HR > 1 indicates an increased hazard of death.
# 
# So the HR = 0.59 implies that 0.59 times as many females are dying as males, at any given time. Stated differently, females have a significantly lower hazard of death than males in these data.

3.1.0.3 Deal with the no event in diverse issue

#https://stats.stackexchange.com/questions/124821/dealing-with-no-events-in-one-treatment-group-survival-analysis

##############
######## 1- Implements Firth's penalized maximum likelihood bias reduction method for Cox regression
##############
# https://bookdown.org/rwnahhas/RMPH/survival-separation.html
# library("coxphf")
# # COX representation wth Penalized regression
# s_pen_full <- coxphf::coxphf(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block, 
#             data = data_survival)
# s_pen_full
# summary(s_pen_full)
# 
# s_pen <- coxphf::coxphf(formula = survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
#             data = data_survival)
# s_pen
# summary(s_pen)

# s2 <- coxphf::coxphf(survival::Surv(Gen_eggs_extinct, status) ~ Block, 
#             data = data_survival)
# anova(s1, s2)


# # PENALIZED LRT 
# coxphftest( formula=survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block,
#             test=~Genetic_Diversity,  data=data_survival)
# 


##############
######## 2- Augmented Data: As in Scucz et al. 2017 - Ecology
##############
# str(data_survival)
# data_survival_augmented <- rbind(data_survival, 
#                                  c("HighX", "Block3", "High", "6", "extinct", "1"))
# str(data_survival)
# str(data_survival_augmented)
# data_survival_augmented$status <- as.numeric(data_survival_augmented$status)
# data_survival_augmented$Gen_eggs_extinct <- as.numeric(data_survival_augmented$Gen_eggs_extinct)
# 
# 
# #With real data
# s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block, 
#                       data = data_survival)
# s2 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Block, 
#                       data = data_survival)
# anova(s1, s2)
# 
# 
# #With augmented data
# s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block, 
#             data = data_survival_augmented)
# s2 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~  Block, 
#             data = data_survival_augmented)
# anova(s1, s2)
# 
# survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
#                 data = data_survival_augmented) %>% 
#   gtsummary::tbl_regression(exp = TRUE) 
# 
# s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity-1, 
#                       data = data_survival_augmented)
# sum_s1<-summary(s1)
# sum_s1
# confint(s1)
# 
# exp(sum_s1$coefficients)
# 
# survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
#                   data = data_survival_augmented)
# summary(survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
#                 data = data_survival_augmented))
# 
# 


##############
######## 3- Consider only bottlenecked (LRT possible)
##############
data_survival_bottlenecked <- data_survival[data_survival$Genetic_Diversity!="High",]
data_survival_bottlenecked<-droplevels(data_survival_bottlenecked)

#With real data - But only bottlenecked 
s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block, 
                      data = data_survival_bottlenecked)
s2 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Block, 
                      data = data_survival_bottlenecked)
anova(s1, s2)
## Analysis of Deviance Table
##  Cox model: response is  survival::Surv(Gen_eggs_extinct, status)
##  Model 1: ~ Genetic_Diversity + Block
##  Model 2: ~ Block
##    loglik  Chisq Df Pr(>|Chi|)
## 1 -43.520                     
## 2 -43.574 0.1084  1      0.742
survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
                data = data_survival_bottlenecked) %>% 
  gtsummary::tbl_regression(exp = TRUE) 
Characteristic HR1 95% CI1 p-value
Genetic_Diversity
Medium — —
Low 1.07 0.35, 3.28 >0.9
1 HR = Hazard Ratio, CI = Confidence Interval
##############
######## Good- Comparison without estimates
##############
#Log Rank Test in R, the most frequent technique to compare survival curves between two groups is to use a log-rank test.

# Test hypotheses:
# Ho: In terms of survivability, there is no difference between the two groups.
# Hi: There is a survival differential between the two groups.
# We can reject the null hypothesis and infer that there is enough evidence to claim there is a difference in survival between the two groups if the p-value of the test is less than 0.05 (95% confidence level).


#Comparison 
survival::survdiff(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
                   data = data_survival)
## Call:
## survival::survdiff(formula = survival::Surv(Gen_eggs_extinct, 
##     status) ~ Genetic_Diversity, data = data_survival)
## 
##                           N Observed Expected (O-E)^2/E (O-E)^2/V
## Genetic_Diversity=High   27        0     4.41     4.405      7.00
## Genetic_Diversity=Medium 23        5     3.44     0.707      1.01
## Genetic_Diversity=Low    34        8     5.15     1.571      2.73
## 
##  Chisq= 7  on 2 degrees of freedom, p= 0.03
#The Chi-Squared test statistic is 7 with 2 degree of freedom and the corresponding p-value is 0.03. 
#Since this p-value is less than 0.05, we reject the null hypothesis.
#In other words, we have sufficient evidence to say that there is a statistically significant difference in survival between the three groups.

# survfit(Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival)
# summary(survfit(Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival))
# 
# 

3.1.0.4 Other representation

### OTHER REPRESENTATION :  
s2 <- survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, 
                        data = data_survival)
summary(s2)
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~ 
##     Genetic_Diversity, data = data_survival)
## 
##                 Genetic_Diversity=High 
##      time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 
##                 Genetic_Diversity=Medium 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     4     23       2    0.913  0.0588        0.805        1.000
##     5     21       2    0.826  0.0790        0.685        0.996
##     6     19       1    0.783  0.0860        0.631        0.971
## 
##                 Genetic_Diversity=Low 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     4     34       2    0.941  0.0404        0.865        1.000
##     5     32       4    0.824  0.0654        0.705        0.962
##     6     28       2    0.765  0.0727        0.635        0.921
print(s2)
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~ 
##     Genetic_Diversity, data = data_survival)
## 
##                           n events median 0.95LCL 0.95UCL
## Genetic_Diversity=High   27      0     NA      NA      NA
## Genetic_Diversity=Medium 23      5     NA      NA      NA
## Genetic_Diversity=Low    34      8     NA      NA      NA
# Access to the sort summary table
summary(s2)$table
##                          records n.max n.start events    rmean  se(rmean)
## Genetic_Diversity=High        27    27      27      0 6.000000 0.00000000
## Genetic_Diversity=Medium      23    23      23      5 5.739130 0.12627260
## Genetic_Diversity=Low         34    34      34      8 5.764706 0.09355367
##                          median 0.95LCL 0.95UCL
## Genetic_Diversity=High       NA      NA      NA
## Genetic_Diversity=Medium     NA      NA      NA
## Genetic_Diversity=Low        NA      NA      NA
d <- data.frame(time = s2$time,
                n.risk = s2$n.risk,
                n.event = s2$n.event,
                n.censor = s2$n.censor,
                surv = s2$surv,
                upper = s2$upper,
                lower = s2$lower)
head(d)
##   time n.risk n.event n.censor      surv     upper     lower
## 1    6     27       0       27 1.0000000 1.0000000 1.0000000
## 2    4     23       2        0 0.9130435 1.0000000 0.8048548
## 3    5     21       2        0 0.8260870 0.9964666 0.6848395
## 4    6     19       1       18 0.7826087 0.9707088 0.6309579
## 5    4     34       2        0 0.9411765 1.0000000 0.8653187
## 6    5     32       4        0 0.8235294 0.9621763 0.7048611
plot2 <- survminer::ggsurvplot(s2,
                               pval = TRUE,             # show p-value of log-rank test.
                               #pval.coord  = c(1,0.2), 
                               pval.size = 5, 
                               conf.int = TRUE,         # show confidence intervals for point estimaes of survival curves.
                               # conf.int.style = "step",  # customize style of confidence intervals
                               conf.int.style = "ribbon",  # customize style of confidence intervals 
                               conf.int.alpha = 0.3,  # customize style of confidence intervals
                               censor = TRUE, 
                               censor.shape = "x", 
                               censor.size = 6, 
                               xlab = "Generation",   # customize X axis label.
                               break.time.by = 1,     # break X axis in time intervals by 1
                               ggtheme = theme_light(), # customize plot and risk table with a theme.
                               #risk.table = TRUE, # Add risk table
                               #linetype = "strata", # Change line type by groups
                               risk.table = "abs_pct",  # absolute number and percentage at risk.
                               risk.table.y.text.col = T,# colour risk table text annotations.
                               risk.table.y.text = FALSE,# show bars instead of names in text annotations in legend of risk table.
                               risk.table.col = "strata", # Change risk table color by groups
                               legend.labs = c("Diverse","Intermediate bottleneck","Strong bottleneck"),   
                               legend = c("right"),
                               legend.title = c("Demographic\nhistory:"),   
                               palette = c("#00AFBB", "#E7B800", "#FC4E07")) 



plot2

# 
# cowplot::save_plot(file = here::here("figures","Extinction_Survival_analysis_V2.pdf"),
#                    plot2, base_height = 10/cm(1),  base_width = 15/cm(1), dpi = 610)



# Fit a Cox proportional hazards model
# fit.coxph <- coxph(Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival)
# ggforest(fit.coxph, data = data_survival)

3.2 Probability of extinction

############ 
###### Clean dataset
############ 
#Prepare clean dataset
data_proba_extinction <- data[data$Generation_Eggs==6,
                              c("Block","Population","Genetic_Diversity","Extinction_finalstatus")]
data_proba_extinction$Extinction_finalstatus <- as.factor(data_proba_extinction$Extinction_finalstatus)
data_proba_extinction$y <- ifelse(data_proba_extinction$Extinction_finalstatus == "yes", 1, 0)



############ 
###### Analysis
############ 
# #Analysis
# mod0 <- glm(y ~ Genetic_Diversity + Block,  
#       data = data_proba_extinction, family = binomial)
# 
# mod0 <- glm(y ~ Genetic_Diversity-1 + Block,  
#       data = data_proba_extinction, family = binomial)
# 
# 
# #But issue of convergence 
# plogis(confint(mod0))


# ############ 
# ###### Analysis
# ############ 
# 
# 
# summary(mod0)
# 
# 
# 
# #Test convergence 
# mod1 <- glm(y ~ Genetic_Diversity + Block ,  
#       data = data_proba_extinction[data_proba_extinction$Genetic_Diversity!="High",], 
#       family = binomial)
# 
# #Test genetic diversity effect
# mod0 <- glm(y ~ Block ,  
#       data = data_proba_extinction[data_proba_extinction$Genetic_Diversity!="High",], family = binomial)
# anova(mod0, mod1, test = "Chisq")
# #We remove genetic diversity 
# 
# # Perform the lrtest 
# (A <- logLik(mod1))
# (B <- logLik(mod0))
# #Since the logLik() function gives more information than the numeric value, use the as.numeric() function to isolate the numeric value
# (teststat <- -2 * (as.numeric(A)-as.numeric(B)))
# #df = 5 - 3 = 2
# (p.val <- pchisq(teststat, df = 2, lower.tail = FALSE))
# 
# lmtest::lrtest(mod1, mod0)




############ 
###### CONFIDENCE INTERVAL 
############
# mod0 <- glm(y ~ Genetic_Diversity-1 + Block ,  
#       data = data_proba_extinction[data_proba_extinction$Genetic_Diversity!="High",], family = binomial)
# summary(mod0)
# 
# #Extract probability of extinction 
# #(p_high <- plogis(-21.3144))
# (p_med <- plogis(-3.0142))
# (p_low <- plogis(-2.8082))
# 
# 
# # Confidence interval
# table_raw <- confint(mod0)
# table_ci <- plogis(table_raw)
# table_ci
# 
# ############ 
# ###### Predicted value
# ############ 
# 
# 
# data_predict_extinct <- data.frame(Genetic_Diversity = levels(data_proba_extinction$Genetic_Diversity),
#                                    Block = "Block4")
# 
# data_predict_extinct$predict <- predict(mod0, type = "response",
#         re.form = NA,
#         newdata = data_predict_extinct)
#         
# data_predict_extinct
# p_high
# p_med
# p_low
# 
# 
# ############ 
# ###### Posthoc
# ############ 
# emmeans::emmeans(mod0, list(pairwise ~ Genetic_Diversity), adjust = "tukey") 
# 






############## 
######## 1- REAL DATA: issue with high no extinction 
##############

mod0 <- glm(y ~ Genetic_Diversity-1 + Block ,
      data = data_proba_extinction, family = binomial)
summary(mod0)
## 
## Call:
## glm(formula = y ~ Genetic_Diversity - 1 + Block, family = binomial, 
##     data = data_proba_extinction)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.26011  -0.44258  -0.30957  -0.00005   2.47474  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)   
## Genetic_DiversityHigh    -21.3144  1855.7435  -0.011  0.99084   
## Genetic_DiversityMedium   -3.0142     1.1293  -2.669  0.00760 **
## Genetic_DiversityLow      -2.8082     1.0659  -2.635  0.00843 **
## BlockBlock4                0.7402     1.2714   0.582  0.56046   
## BlockBlock5                3.0006     1.1265   2.664  0.00773 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 116.449  on 84  degrees of freedom
## Residual deviance:  46.833  on 79  degrees of freedom
## AIC: 56.833
## 
## Number of Fisher Scoring iterations: 18
(p_high <- plogis(-21.3144))
## [1] 5.536989e-10
(p_med <- plogis(-3.0142))
## [1] 0.04678847
(p_low <- plogis(-2.8082))
## [1] 0.05688267
# Confidence interval: high [0,1]
plogis(confint(mod0))
##                                 2.5 %    97.5 %
## Genetic_DiversityHigh   1.689103e-236 1.0000000
## Genetic_DiversityMedium  2.415647e-03 0.2298311
## Genetic_DiversityLow     3.204498e-03 0.2431697
## BlockBlock4              1.553712e-01 0.9794348
## BlockBlock5              7.579725e-01 0.9975064
## ISSUE: DOESNT CONVERGE



mod0 <- glm(y ~ Genetic_Diversity + Block ,
      data = data_proba_extinction, family = binomial)
mod1 <- glm(y ~ Block ,
      data = data_proba_extinction, family = binomial)
anova(mod1,mod0,test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: y ~ Block
## Model 2: y ~ Genetic_Diversity + Block
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1        81     58.903                        
## 2        79     46.833  2   12.069 0.002394 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##############
######## 2- Augmented Data: As in Scucz et al. 2017 - Ecology
##############
# data_proba_extinction_augmented <- rbind(data_proba_extinction,
#                                          # c("Block3","HighX","High","no",0),
#                                          # c("Block3","LowX","Low","no",0),
#                                          # c("Block3","MedX","Medium","no",0),
#                                          #                                                                                      c("Block3","LowY","Low","yes",1),
# #                                         c("Block3","MedXY","Medium","yes",1),
#                                          c("Block3","HighY","High","yes",1))
data_proba_extinction_augmented <- rbind(data_proba_extinction,
                                         c("Block3","HighY","High","yes",1))
data_proba_extinction_augmented$y <- as.numeric(data_proba_extinction_augmented$y)

mod0 <- glm(y ~ Genetic_Diversity-1 + Block ,
      data = data_proba_extinction_augmented, family = binomial)
summary(mod0)
## 
## Call:
## glm(formula = y ~ Genetic_Diversity - 1 + Block, family = binomial, 
##     data = data_proba_extinction_augmented)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1885  -0.4686  -0.4349  -0.1538   2.9692  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## Genetic_DiversityHigh   -4.39594    1.28253  -3.428 0.000609 ***
## Genetic_DiversityMedium -2.31099    0.86770  -2.663 0.007737 ** 
## Genetic_DiversityLow    -2.11810    0.79597  -2.661 0.007790 ** 
## BlockBlock4             -0.03575    1.05172  -0.034 0.972886    
## BlockBlock5              2.14410    0.86309   2.484 0.012984 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 117.835  on 85  degrees of freedom
## Residual deviance:  58.406  on 80  degrees of freedom
## AIC: 68.406
## 
## Number of Fisher Scoring iterations: 6
#Real data
(p_high <- plogis(-21.3144))
## [1] 5.536989e-10
(p_med <- plogis(-3.0142))
## [1] 0.04678847
(p_low <- plogis(-2.8082))
## [1] 0.05688267
#Augmented data
(p_high <- plogis(-3.9197))
## [1] 0.01946081
(p_med <- plogis(-1.6788))
## [1] 0.1572544
(p_low <- plogis(-1.5627))
## [1] 0.1732596
#Augmented data with only extinction
(p_high <- plogis(-4.39594))
## [1] 0.01217718
(p_med <- plogis(-2.31099))
## [1] 0.09021685
(p_low <- plogis(-2.11810))
## [1] 0.10735
# Confidence interval: high [0,1]
plogis(confint(mod0))
##                                2.5 %     97.5 %
## Genetic_DiversityHigh   0.0004918074 0.09139237
## Genetic_DiversityMedium 0.0127711273 0.30480717
## Genetic_DiversityLow    0.0174349180 0.31771819
## BlockBlock4             0.0963225771 0.89727408
## BlockBlock5             0.6478312906 0.98431182
mod0 <- glm(y ~ Genetic_Diversity + Block ,
      data = data_proba_extinction_augmented, family = binomial)
mod1 <- glm(y ~ Block ,
      data = data_proba_extinction_augmented, family = binomial)
anova(mod1,mod0,test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: y ~ Block
## Model 2: y ~ Genetic_Diversity + Block
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1        82     64.685                       
## 2        80     58.406  2   6.2787  0.04331 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############## 
######## 3- PenaliZed  Logistic Regression data 
############## 


fit<-logistf::logistf(y ~ Genetic_Diversity-1 + Block, 
                      data=data_proba_extinction)
summary(fit)
## logistf::logistf(formula = y ~ Genetic_Diversity - 1 + Block, 
##     data = data_proba_extinction)
## 
## Model fitted by Penalized ML
## Coefficients:
##                               coef  se(coef)  lower 0.95 upper 0.95      Chisq
## Genetic_DiversityHigh   -5.5403989 1.6381117 -10.6142028 -2.9271591 31.0874311
## Genetic_DiversityMedium -2.5593086 0.9220804  -4.9074011 -0.9845788 12.1477934
## Genetic_DiversityLow    -2.3929983 0.8625221  -4.6407993 -0.9400804 12.5897375
## BlockBlock4              0.5511409 1.0469817  -1.5552881  3.0070956  0.2677213
## BlockBlock5              2.5551098 0.9273661   0.9131677  4.8771647 10.2850062
##                                    p method
## Genetic_DiversityHigh   2.466634e-08      2
## Genetic_DiversityMedium 4.914598e-04      2
## Genetic_DiversityLow    3.878706e-04      2
## BlockBlock4             6.048644e-01      2
## BlockBlock5             1.341156e-03      2
## 
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
## 
## Likelihood ratio test=29.25035 on 5 df, p=2.070724e-05, n=84
## Wald test = 23.62508 on 5 df, p = 0.0002562504
nobs(fit)
## [1] 84
# Test genetic diversity effect
mod0<-logistf::logistf(y ~ Genetic_Diversity + Block, 
                      data=data_proba_extinction)
drop1(mod0)
##                       ChiSq df     P-value
## Genetic_Diversity  9.394033  2 0.009122452
## Block             12.698693  2 0.001747889
extractAIC(mod0)
##              null 
##  4.00000 29.29027
mod0<-logistf::logistf(y ~ Genetic_Diversity + Block, 
                      data=data_proba_extinction )
mod1<-logistf::logistf(y ~ Block, 
                      data=data_proba_extinction)
anova(mod1,mod0)
## Comparison of logistf models:
##                         Formula ChiSquared 
## 1 y ~ Genetic_Diversity + Block   21.29027 
## 2                     y ~ Block   11.89623 
## 
## Method:  nested 
## Chi-Squared:  9.394033   df= 2   P= 0.009122452
# logLik(mod1)
#lmtest::lrtest(mod1,mod0)



### Estimates and CI 
fit<-logistf::logistf(y ~ Genetic_Diversity-1 + Block, 
                      data=data_proba_extinction)
sum_p_extinct <- summary(fit)
## logistf::logistf(formula = y ~ Genetic_Diversity - 1 + Block, 
##     data = data_proba_extinction)
## 
## Model fitted by Penalized ML
## Coefficients:
##                               coef  se(coef)  lower 0.95 upper 0.95      Chisq
## Genetic_DiversityHigh   -5.5403989 1.6381117 -10.6142028 -2.9271591 31.0874311
## Genetic_DiversityMedium -2.5593086 0.9220804  -4.9074011 -0.9845788 12.1477934
## Genetic_DiversityLow    -2.3929983 0.8625221  -4.6407993 -0.9400804 12.5897375
## BlockBlock4              0.5511409 1.0469817  -1.5552881  3.0070956  0.2677213
## BlockBlock5              2.5551098 0.9273661   0.9131677  4.8771647 10.2850062
##                                    p method
## Genetic_DiversityHigh   2.466634e-08      2
## Genetic_DiversityMedium 4.914598e-04      2
## Genetic_DiversityLow    3.878706e-04      2
## BlockBlock4             6.048644e-01      2
## BlockBlock5             1.341156e-03      2
## 
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
## 
## Likelihood ratio test=29.25035 on 5 df, p=2.070724e-05, n=84
## Wald test = 23.62508 on 5 df, p = 0.0002562504
# Confidence interval:
plogis(sum_p_extinct$coefficients)
##   Genetic_DiversityHigh Genetic_DiversityMedium    Genetic_DiversityLow 
##             0.003909616             0.071803609             0.083708170 
##             BlockBlock4             BlockBlock5 
##             0.634400249             0.927916044
plogis(sum_p_extinct$ci.lower)
##   Genetic_DiversityHigh Genetic_DiversityMedium    Genetic_DiversityLow 
##            2.456403e-05            7.337438e-03            9.557749e-03 
##             BlockBlock4             BlockBlock5 
##            1.743238e-01            7.136479e-01
plogis(sum_p_extinct$ci.upper)
##   Genetic_DiversityHigh Genetic_DiversityMedium    Genetic_DiversityLow 
##              0.05082721              0.27198420              0.28088410 
##             BlockBlock4             BlockBlock5 
##              0.95289365              0.99243902
data.frame(coef = plogis(sum_p_extinct$coefficients), 
           ci_low = plogis(sum_p_extinct$ci.lower), 
           ci_upper = plogis(sum_p_extinct$ci.upper))
##                                coef       ci_low   ci_upper
## Genetic_DiversityHigh   0.003909616 2.456403e-05 0.05082721
## Genetic_DiversityMedium 0.071803609 7.337438e-03 0.27198420
## Genetic_DiversityLow    0.083708170 9.557749e-03 0.28088410
## BlockBlock4             0.634400249 1.743238e-01 0.95289365
## BlockBlock5             0.927916044 7.136479e-01 0.99243902
# Probability 
(p_high <- plogis(-5.5403989))
## [1] 0.003909616
(p_med <- plogis(-2.5593086))
## [1] 0.07180361
(p_low <- plogis(-2.3929983))
## [1] 0.08370817
plogis(confint(fit))
##                            Lower 95%  Upper 95%
## Genetic_DiversityHigh   2.456403e-05 0.05082721
## Genetic_DiversityMedium 7.337438e-03 0.27198420
## Genetic_DiversityLow    9.557749e-03 0.28088410
## BlockBlock4             1.743238e-01 0.95289365
## BlockBlock5             7.136479e-01 0.99243902
#plogis(sum_p_extinct)

3.3 Time to extinction

data_timeextinction<-data[data$First_Throw=="extinct",]
data_timeextinction <- data_timeextinction[complete.cases(data_timeextinction$Nb_adults), ]
data_timeextinction
##     Population  Block         Old_Label                  Label
## 207      Low16 Block4             B4-O1 8/18 BE B4 | G6 Low 16
## 274      Low27 Block5             B5-B1 7/21 BE B5 | G5 Low 27
## 277      Low28 Block5             B5-E1 6/16 BE B5 | G4 Low 28
## 296      Low30 Block5             B5-D1 7/21 BE B5 | G5 Low 30
## 301      Low31 Block5         B(2)5-P1  7/21 BE B5 | G5 Low 31
## 309      Low32 Block5         B(2)5-Q1  6/16 BE B5 | G4 Low 32
## 318      Low33 Block5         B(2)5-T1  8/25 BE B5 | G6 Low 33
## 335      Low36 Block5         B(2)5-X1  7/21 BE B5 | G5 Low 36
## 397      Med14 Block4   B4-Backup Fam L 7/14 BE B4 | G5 Med 14
## 410      Med17 Block5 B5 - Backup Fam F 6/16 BE B5 | G4 Med 17
## 438      Med20 Block5 B5 - Backup Fam I 7/21 BE B5 | G5 Med 20
## 448      Med22 Block5   B5-Backup Fam B 6/16 BE B5 | G4 Med 22
## 476       Med5 Block3 B3 - Backup Fam E  8/11 BE B3 | G6 Med 5
##     Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## 207               Low                  5               6     8/18/21
## 274               Low                  4               5     7/21/21
## 277               Low                  3               4     6/16/21
## 296               Low                  4               5     7/21/21
## 301               Low                  4               5     7/21/21
## 309               Low                  3               4     6/16/21
## 318               Low                  5               6     8/25/21
## 335               Low                  4               5     7/21/21
## 397            Medium                  4               5     7/14/21
## 410            Medium                  3               4     6/16/21
## 438            Medium                  4               5     7/21/21
## 448            Medium                  3               4     6/16/21
## 476            Medium                  5               6     8/11/21
##     Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2 MC_Box1 MC_Box2
## 207         8/18/21       8/19/21       <NA>       <NA>      NA      NA
## 274         7/21/21       7/22/21       <NA>       <NA>       0       0
## 277         6/16/21       6/17/21       <NA>       <NA>       0       0
## 296         7/21/21       7/22/21       <NA>       <NA>       0       0
## 301         7/21/21       7/22/21       <NA>       <NA>       0       0
## 309         6/16/21       6/17/21       <NA>       <NA>       0       0
## 318         8/25/21       8/26/21       <NA>       <NA>      NA      NA
## 335         7/21/21       7/22/21       <NA>       <NA>       0       0
## 397         7/14/21       7/15/21       <NA>       <NA>       0       0
## 410         6/16/21       6/17/21       <NA>       <NA>       0       0
## 438         7/21/21       7/22/21       <NA>       <NA>       0       0
## 448         6/16/21       6/17/21       <NA>       <NA>       0       0
## 476         8/11/21       8/12/21       <NA>       <NA>       0       0
##     MC_Total_Adults HC_Box1 HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## 207               0       0       0               0          2           0
## 274               0       0       0               0          3           0
## 277               0       0       0               0          2           0
## 296               0       0       0               0          1           0
## 301               0       0       0               0          2           0
## 309               0       0       0               0        127           0
## 318               0      NA      NA               0          2           0
## 335               0       0       0               0          1           0
## 397               0      NA      NA               0          8          NA
## 410               0       0       0               0          5           0
## 438               0       0       0               0          1           0
## 448               0       0       0               0         16           0
## 476               0      NA      NA               0          1           0
##     First_Throw Extinction_finalstatus Less_than_5 Nb_adults Lambda obs
## 207     extinct                    yes         yes         0      0 462
## 274     extinct                    yes         yes         0      0 403
## 277     extinct                    yes         yes         0      0 320
## 296     extinct                    yes         yes         0      0 406
## 301     extinct                    yes         yes         0      0 407
## 309     extinct                    yes         yes         0      0 324
## 318     extinct                    yes         yes         0      0 493
## 335     extinct                    yes         yes         0      0 412
## 397     extinct                    yes         yes         0      0 392
## 410     extinct                    yes         yes         0      0 329
## 438     extinct                    yes         yes         0      0 416
## 448     extinct                    yes         yes         0      0 334
## 476     extinct                    yes         yes         0      0 443
##     gen_square  He_start He_lost_gen_t He_remain    He_exp    He_end
## 207         36 0.5544299            NA        NA 0.6449276 0.3575672
## 274         25 0.5367998            NA        NA 0.8056432 0.4324691
## 277         16 0.5386585            NA        NA 0.7452523 0.4014365
## 296         25 0.5540444            NA        NA 0.4950540 0.2742819
## 301         25 0.5532775            NA        NA 0.7440450 0.4116634
## 309         16 0.5528880            NA        NA 0.9892872 0.5469651
## 318         36 0.5523333            NA        NA 0.6083089 0.3359893
## 335         25 0.5427371            NA        NA 0.4855518 0.2635269
## 397         25 0.6802331            NA        NA 0.9285014 0.6315974
## 410         16 0.6825509            NA        NA 0.8944237 0.6104897
## 438         25 0.6819650            NA        NA 0.4841994 0.3302071
## 448         16 0.6809168            NA        NA 0.9614965 0.6546991
## 476         36 0.6807065            NA        NA 0.4714889 0.3209456
##     ID_extinction
## 207        extant
## 274        extant
## 277        extant
## 296        extant
## 301        extant
## 309        extant
## 318        extant
## 335        extant
## 397        extant
## 410        extant
## 438        extant
## 448        extant
## 476        extant
tapply(data_timeextinction$Generation_Eggs,data_timeextinction$Genetic_Diversity,mean)
##   High Medium    Low 
##     NA    4.8    5.0
#Test time of extinction
mod1 <- glm(Generation_Eggs ~ Genetic_Diversity + Block,
                    data = data_timeextinction, family = "poisson")
mod0 <- glm(Generation_Eggs ~  Block,
                    data = data_timeextinction, family = "poisson")

anova(mod0, mod1, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Generation_Eggs ~ Block
## Model 2: Generation_Eggs ~ Genetic_Diversity + Block
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1        10    0.95459                     
## 2         9    0.74888  1  0.20571   0.6502
#We keep genetic diversity 

logLik(mod1)
## 'log Lik.' -22.83384 (df=4)
#Check for  overdispersion 
sqrt(deviance(mod1)/(nobs(mod1)-length(coef(mod1))))
## [1] 0.2884598
sigma(mod1)
## [1] 0.2884598
summary(mod1)
## 
## Call:
## glm(formula = Generation_Eggs ~ Genetic_Diversity + Block, family = "poisson", 
##     data = data_timeextinction)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.41035  -0.13936   0.05512   0.05512   0.49031  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            1.7918     0.4082   4.389 1.14e-05 ***
## Genetic_DiversityLow   0.1295     0.2877   0.450    0.653    
## BlockBlock4           -0.1539     0.5301  -0.290    0.772    
## BlockBlock5           -0.3366     0.4813  -0.699    0.484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1.40752  on 12  degrees of freedom
## Residual deviance: 0.74888  on  9  degrees of freedom
## AIC: 53.668
## 
## Number of Fisher Scoring iterations: 4
#Residual deviance / df
summary(mod1)
## 
## Call:
## glm(formula = Generation_Eggs ~ Genetic_Diversity + Block, family = "poisson", 
##     data = data_timeextinction)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.41035  -0.13936   0.05512   0.05512   0.49031  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            1.7918     0.4082   4.389 1.14e-05 ***
## Genetic_DiversityLow   0.1295     0.2877   0.450    0.653    
## BlockBlock4           -0.1539     0.5301  -0.290    0.772    
## BlockBlock5           -0.3366     0.4813  -0.699    0.484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1.40752  on 12  degrees of freedom
## Residual deviance: 0.74888  on  9  degrees of freedom
## AIC: 53.668
## 
## Number of Fisher Scoring iterations: 4
0.74888/9
## [1] 0.08320889
AER::dispersiontest(mod1)
## 
##  Overdispersion test
## 
## data:  mod1
## z = -25.921, p-value = 1
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
## 0.05772897
# # Add random obs effect 
# data_dyn$obs<-as.factor(1:nrow(data_dyn))



# Perform the lrtest 
(A <- logLik(mod1))
## 'log Lik.' -22.83384 (df=4)
(B <- logLik(mod0))
## 'log Lik.' -22.9367 (df=3)
#attr(logLik(mod1), "df")-attr(logLik(mod0), "df") 


#Since the logLik() function gives more information than the numeric value, use the as.numeric() function to isolate the numeric value
(teststat <- -2 * (as.numeric(A)-as.numeric(B)))
## [1] -0.2057062
#df = 5 - 3 = 2
(p.val <- pchisq(teststat, df = 2, lower.tail = FALSE))
## [1] 1
lmtest::lrtest(mod0,mod1)
## Likelihood ratio test
## 
## Model 1: Generation_Eggs ~ Block
## Model 2: Generation_Eggs ~ Genetic_Diversity + Block
##   #Df  LogLik Df  Chisq Pr(>Chisq)
## 1   3 -22.937                     
## 2   4 -22.834  1 0.2057     0.6502
mod1 <- glm(Generation_Eggs ~ Genetic_Diversity-1 + Block,
                    data = data_timeextinction, family = "poisson")
sum_mod1 <- summary(mod1)
datatransformed <- confint(mod1)
exp(datatransformed)
##                             2.5 %    97.5 %
## Genetic_DiversityMedium 2.3845707 12.157137
## Genetic_DiversityLow    2.3627659 17.279049
## BlockBlock4             0.3099881  2.578667
## BlockBlock5             0.2925632  1.999911
exp(sum_mod1$coefficients)
##                          Estimate Std. Error    z value Pr(>|z|)
## Genetic_DiversityMedium 6.0000000   1.504181 80.5514766 1.000011
## Genetic_DiversityLow    6.8296508   1.647836 46.8373186 1.000120
## BlockBlock4             0.8573889   1.699154  0.7480860 2.163300
## BlockBlock5             0.7142037   1.618161  0.4969116 1.623100
exp(1.568)
## [1] 4.797045
exp(1.6094)
## [1] 4.99981

3.4 Population size over time

3.4.1 GAMMM Model

3.4.1.1 Basic model

# https://jacolienvanrij.com/Tutorials/GAMM.html
# install.packages("itsadug")
# packageVersion('mgcv')
# packageVersion('itsadug')

data_dyn <- data[data$Generation_Eggs>=2&data$Extinction_finalstatus=="no",]
str(data_dyn)
## 'data.frame':    355 obs. of  33 variables:
##  $ Population            : Factor w/ 84 levels "High1","High13",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ Block                 : Factor w/ 3 levels "Block3","Block4",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ Old_Label             : chr  "B3 All red mix " "B3 All red mix " "B3_All_Red_Mix " "B3_All_Red_Mix " ...
##  $ Label                 : chr  "7/7 BE B3 | G5 High 1" "6/2 BE B3 | G4 High 1" "4/28 BE B3 | G3 High 1" "BE B3 3/24 | G2 High1" ...
##  $ Genetic_Diversity     : Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Generation_Parents    : int  4 3 2 1 5 5 3 2 1 4 ...
##  $ Generation_Eggs       : int  5 4 3 2 6 6 4 3 2 5 ...
##  $ Date_Census           : chr  "7/7/21" "6/2/21" "4/28/21" "3/24/21" ...
##  $ Date_Start_Eggs       : chr  "7/7/21" "6/2/21" "4/28/21" "3/24/21" ...
##  $ Date_End_Eggs         : chr  "7/8/21" "6/3/21" "4/29/21" "3/25/21" ...
##  $ Image_Box1            : chr  "DSC_0779" "DSC_0206" "DSC_0585" "DSC_0920" ...
##  $ Image_Box2            : chr  "DSC_0780" "DSC_0207" "DSC_0586" "DSC_0921" ...
##  $ MC_Box1               : num  26.6 85.4 82.3 266.9 100.1 ...
##  $ MC_Box2               : num  5.15 90.72 78.01 149.8 74.38 ...
##  $ MC_Total_Adults       : num  31.8 176.1 160.3 416.7 174.5 ...
##  $ HC_Box1               : int  27 79 83 253 64 47 71 60 101 47 ...
##  $ HC_Box2               : int  4 96 82 135 58 80 118 113 214 44 ...
##  $ HC_Total_Adults       : int  31 175 165 388 122 127 189 173 315 91 ...
##  $ Nb_parents            : num  175 165 388 100 31 91 173 315 100 189 ...
##  $ Growth_rate           : num  0.177 1.061 0.425 3.88 3.935 ...
##  $ First_Throw           : chr  "no" "no" "no" "no" ...
##  $ Extinction_finalstatus: Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Less_than_5           : chr  "no" "no" "no" "no" ...
##  $ Nb_adults             : int  31 175 165 388 122 127 189 173 315 91 ...
##  $ Lambda                : num  0.177 1.061 0.425 3.88 3.935 ...
##  $ obs                   : Factor w/ 504 levels "1","2","3","4",..: 337 253 169 85 421 447 279 195 111 363 ...
##  $ gen_square            : num  25 16 9 4 36 36 16 9 4 25 ...
##  $ He_start              : num  0.999 0.999 0.999 0.999 0.999 ...
##  $ He_lost_gen_t         : num  0.984 0.997 0.997 0.999 0.996 ...
##  $ He_remain             : num  0.971 0.986 0.989 0.992 0.967 ...
##  $ He_exp                : num  0.968 0.968 0.968 0.968 0.968 ...
##  $ He_end                : num  0.967 0.967 0.967 0.967 0.967 ...
##  $ ID_extinction         : chr  "extant" "extant" "extant" "extant" ...
# 1-dimensional smooth of Time:
#m1 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs), data=data_dyn)
#Meaning the GAM has failed to construct a sensible smooth term for this data. You can limit the maximum df of the smooth using the k parameter:
m1 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, k=3), family = "poisson", data=data_dyn)

#number of ‘knots’. This parameter determines the upper bound of the number of underlying base functions being used to build up the curve. Thus, this parameter constraints the wigglyness of a smooth, or - as a metaphor - the number of bowpoints of a curve.
#Note that the model will base the number of base functions (reflected in the edf of the summary) on the data with the setting for k as upper bound. By default, the value of k for s() is around 9, and for te() and ti() 5 per dimension. Importantly, the value of k should be at most one less than the number of unique data points, otherwise it will fit the density of that predictor.

# interaction with non-isotropicterms:
#m2 <- gam(Nb_adults ~ te(Generation_Eggs, Genetic_Diversity), data=data_dyn)

# decompose the same interaction in main effects + interaction:
# (note: include main effects)
# m3 <- gam(Nb_adults ~ s(Generation_Eggs) + s(Genetic_Diversity) +
#           + ti(Generation_Eggs, Genetic_Diversity), 
#           data=data_dyn)

# s() : for modeling a 1-dimensional smooth, or for modeling isotropic interactions (variables are measured in same units and on same scale)
# te(): for modeling 2- or n-dimensional interaction surfaces of variables that are not isotropic (but see info about d parameter below). Includes ‘main’ effects.
# ti(): for modeling 2- or n-dimensional interaction surfaces that do not include the ‘main effects

########### 
########### ADD FACTOR EFFECT: 
########### 

# How to set up the interaction depends on the type of grouping predictor:
# with factor include intercept difference: Group + s(Time, by=Group).
# with ordered factor include intercept difference and reference smooth: Group + s(Time) + s(Time, by=Group).
# with binary predictor include reference smooth: s(Time) + s(Time, by=IsGroupChildren).

m.factor <- mgcv::gam(Nb_adults ~ Genetic_Diversity +  s(Generation_Eggs, k=3) + 
            s(Generation_Eggs, by= Genetic_Diversity, k=3), family = "poisson", data=data_dyn)

summary(m.factor)
## 
## Family: poisson 
## Link function: log 
## 
## Formula:
## Nb_adults ~ Genetic_Diversity + s(Generation_Eggs, k = 3) + s(Generation_Eggs, 
##     by = Genetic_Diversity, k = 3)
## 
## Parametric coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              5.048711   0.007184  702.75   <2e-16 ***
## Genetic_DiversityMedium -0.364241   0.012871  -28.30   <2e-16 ***
## Genetic_DiversityLow    -0.448867   0.011778  -38.11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                edf  Ref.df  Chi.sq p-value    
## s(Generation_Eggs)                         1.96929 1.99358 3669.85  <2e-16 ***
## s(Generation_Eggs):Genetic_DiversityHigh   1.92775 1.98689  200.26  <2e-16 ***
## s(Generation_Eggs):Genetic_DiversityMedium 0.01232 0.01357    0.00   0.991    
## s(Generation_Eggs):Genetic_DiversityLow    1.96596 1.99514   83.68  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 10/11
## R-sq.(adj) =  0.631   Deviance explained = 57.3%
## UBRE = 32.587  Scale est. = 1         n = 353
#The smooth terms summary shows three smooths that are significantly different from 0. 
#We cannot conclude from the summary that the two curves are different from each other.


m.factor <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=3) + 
            s(Generation_Eggs, by= Genetic_Diversity, k=3),family = "poisson",  data=data_dyn)
summary(m.factor)
## 
## Family: poisson 
## Link function: log 
## 
## Formula:
## Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 3) + 
##     s(Generation_Eggs, by = Genetic_Diversity, k = 3)
## 
## Parametric coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              5.112244   0.009925 515.080  < 2e-16 ***
## Genetic_DiversityMedium -0.371509   0.012939 -28.713  < 2e-16 ***
## Genetic_DiversityLow    -0.471075   0.011911 -39.549  < 2e-16 ***
## BlockBlock4             -0.043703   0.010311  -4.239 2.25e-05 ***
## BlockBlock5             -0.157407   0.012266 -12.833  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                               edf Ref.df  Chi.sq  p-value    
## s(Generation_Eggs)                         1.9693 1.9935 2912.46  < 2e-16 ***
## s(Generation_Eggs):Genetic_DiversityHigh   1.9286 1.9870  159.13  < 2e-16 ***
## s(Generation_Eggs):Genetic_DiversityMedium 1.0124 1.0137   17.21 3.07e-05 ***
## s(Generation_Eggs):Genetic_DiversityLow    0.9659 0.9951   79.53  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 12/13
## R-sq.(adj) =  0.623   Deviance explained =   58%
## UBRE = 32.112  Scale est. = 1         n = 353
########### 
########### ADD RANDOM EFFECT: 
########### 

# random intercepts adjust the height of other modelterms with a constant value: s(Subject, bs="re").
# random slopes adjust the slope ofthe trend of a numeric predictor: s(Subject, Time, bs="re").
# random smooths adjust the trend of a numeric predictor in a nonlinear way: s(Time, Subject, bs="fs", m=1).

m.mixed <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=3) + 
            s(Generation_Eggs, by= Genetic_Diversity, k=3) + 
             s(Population, bs="re") , family = "poisson", data=data_dyn)
summary(m.mixed)
## 
## Family: poisson 
## Link function: log 
## 
## Formula:
## Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 3) + 
##     s(Generation_Eggs, by = Genetic_Diversity, k = 3) + s(Population, 
##     bs = "re")
## 
## Parametric coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              5.09773    0.07801  65.347  < 2e-16 ***
## Genetic_DiversityMedium -0.39811    0.08934  -4.456 8.34e-06 ***
## Genetic_DiversityLow    -0.48810    0.08143  -5.994 2.05e-09 ***
## BlockBlock4             -0.03068    0.08006  -0.383   0.7016    
## BlockBlock5             -0.18745    0.09303  -2.015   0.0439 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                 edf   Ref.df  Chi.sq p-value
## s(Generation_Eggs)                          1.96948  1.99405 3715.94  <2e-16
## s(Generation_Eggs):Genetic_DiversityHigh    1.92487  1.98702  200.33  <2e-16
## s(Generation_Eggs):Genetic_DiversityMedium  0.01073  0.01185    0.00   0.992
## s(Generation_Eggs):Genetic_DiversityLow     1.96684  1.99558   84.64  <2e-16
## s(Population)                              64.77416 66.00000 2603.25  <2e-16
##                                               
## s(Generation_Eggs)                         ***
## s(Generation_Eggs):Genetic_DiversityHigh   ***
## s(Generation_Eggs):Genetic_DiversityMedium    
## s(Generation_Eggs):Genetic_DiversityLow    ***
## s(Population)                              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 83/84
## R-sq.(adj) =  0.642   Deviance explained =   68%
## UBRE = 24.585  Scale est. = 1         n = 353

3.4.1.2 Model selection

########### 
########### AIC
########### 
m1 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=3), family = "poisson", data=data_dyn)
m2 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=4), family = "poisson", data=data_dyn)
m3 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=5), family = "poisson", data=data_dyn)
m4 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=3), family = "poisson", data=data_dyn)
m5 <- mgcv::gam(Nb_adults ~ Block+ 
                  s(Generation_Eggs, by= Genetic_Diversity, k=4), family = "poisson", data=data_dyn)
m6 <- mgcv::gam(Nb_adults ~ Block +  s(Generation_Eggs, k=3) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=5), family = "poisson", data=data_dyn)
m7 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=3) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=5), family = "poisson", data=data_dyn)
m8 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=3) + 
                   s(Population, bs="re"), family = "poisson", data=data_dyn)
m9 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=4) + 
                   s(Population, bs="re"), family = "poisson", data=data_dyn)
m10 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=5)+
                  s(Population, bs="re"), family = "poisson", data=data_dyn)
m11 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=3)+
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
m12 <- mgcv::gam(Nb_adults ~ Block +  
                  s(Generation_Eggs, by= Genetic_Diversity, k=4) + 
                    s(Population, bs="re"), family = "poisson",  data=data_dyn)
m13 <- mgcv::gam(Nb_adults ~ Block +  s(Generation_Eggs, k=3) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
m14 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=3) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
m15 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=5) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
m16 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=4) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=4) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
m17 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=3) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=3) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)

AIC(m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17)
##            df      AIC
## m1   6.994710 15820.24
## m2   9.763989 15822.89
## m3  12.061325 15790.92
## m4  10.994807 13955.58
## m5  11.764852 15759.43
## m6  12.729808 15726.51
## m7  16.175767 13915.73
## m8  76.248665 11298.95
## m9  79.061452 11296.89
## m10 81.484894 11253.32
## m11 75.770122 11298.48
## m12 79.073993 11296.91
## m13 80.625022 11252.52
## m14 80.103661 11252.00
## m15 80.271775 11251.94
## m16 77.938918 11295.61
## m17 75.646077 11298.34
#https://www.seascapemodels.org/rstats/2021/03/27/common-GAM-problems.html



# Real comparison 
mod0 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=5) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
mod1 <- mgcv::gam(Nb_adults ~ Block +  s(Generation_Eggs, k=5) + 
                  s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
mod2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
mod3 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  
                    s(Generation_Eggs, by= Genetic_Diversity, k=3) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
mod4 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  
                    s(Generation_Eggs, by= Genetic_Diversity, k=4) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
mod5 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  
                    s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
mod7 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +   s(Generation_Eggs, k=3) +
                    s(Generation_Eggs, by= Genetic_Diversity, k=3) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
mod8 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +   s(Generation_Eggs, k=4) +
                    s(Generation_Eggs, by= Genetic_Diversity, k=4) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
mod9 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +   s(Generation_Eggs, k=5) +
                    s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)

AIC(mod1, mod2, mod3, mod4, mod5, mod7, mod8, mod9)
##            df      AIC
## mod1 80.86938 11252.52
## mod2 73.55589 11621.29
## mod3 75.77012 11298.48
## mod4 78.56136 11296.40
## mod5 80.96276 11252.79
## mod7 75.64608 11298.34
## mod8 77.93892 11295.61
## mod9 80.27178 11251.94
modA <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +   s(Generation_Eggs, k=5) +
                    s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)
modB <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +  s(Generation_Eggs, k=5) + 
                    s(Population, bs="re"),family = "poisson",  data=data_dyn)
modC <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block  + 
                    s(Generation_Eggs, by= Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), family = "poisson", data=data_dyn)


AIC(modA, modB, modC)
##            df      AIC
## modA 80.27178 11251.94
## modB 73.55589 11621.29
## modC 80.96276 11252.79
visreg::visreg(modA, xvar = "Generation_Eggs",
       by = "Genetic_Diversity", data = data_dyn,
       method = "REML")

visreg::visreg(modC, xvar = "Generation_Eggs",
       by = "Genetic_Diversity", data = data_dyn,
       method = "REML")

3.4.1.3 Checking overdisperson

mod0 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +   s(Generation_Eggs, k=5) +
                    s(Generation_Eggs, by = Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), method="ML",  family = "poisson", link = log, data=data_dyn)

#Check overdispersion 
sqrt(deviance(mod0)/(nobs(mod0)-length(coef(mod0))))
## [1] 5.814783
sigma(mod0)
## [1] 5.814783
summary(mod0)
## 
## Family: poisson 
## Link function: log 
## 
## Formula:
## Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population, 
##     bs = "re")
## 
## Parametric coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              5.09849    0.06780  75.195  < 2e-16 ***
## Genetic_DiversityMedium -0.39933    0.07771  -5.138 2.77e-07 ***
## Genetic_DiversityLow    -0.48975    0.07084  -6.913 4.74e-12 ***
## BlockBlock4             -0.03083    0.06959  -0.443   0.6578    
## BlockBlock5             -0.18652    0.08089  -2.306   0.0211 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                  edf    Ref.df  Chi.sq p-value
## s(Generation_Eggs)                         3.833e+00 3.954e+00 3742.51  <2e-16
## s(Generation_Eggs):Genetic_DiversityHigh   3.859e+00 3.970e+00  236.75  <2e-16
## s(Generation_Eggs):Genetic_DiversityMedium 1.414e-04 1.671e-04    0.00   0.998
## s(Generation_Eggs):Genetic_DiversityLow    3.391e+00 3.756e+00   86.79  <2e-16
## s(Population)                              6.438e+01 6.600e+01 2593.63  <2e-16
##                                               
## s(Generation_Eggs)                         ***
## s(Generation_Eggs):Genetic_DiversityHigh   ***
## s(Generation_Eggs):Genetic_DiversityMedium    
## s(Generation_Eggs):Genetic_DiversityLow    ***
## s(Population)                              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 91/92
## R-sq.(adj) =  0.638   Deviance explained = 68.2%
## -ML = 5739.3  Scale est. = 1         n = 353
# Add random obs effect 
data_dyn$obs<-as.factor(1:nrow(data_dyn))


mod0 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +   s(Generation_Eggs, k=5) +
                    s(Generation_Eggs, by = Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), 
                    method="ML",  family = mgcv::nb(), link = log, data=data_dyn)

#Check overdispersion 
sqrt(deviance(mod0)/(nobs(mod0)-length(coef(mod0))))
## [1] 1.16428
sigma(mod0)
## [1] 1.16428
summary(mod0)
## 
## Family: Negative Binomial(2.957) 
## Link function: log 
## 
## Formula:
## Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population, 
##     bs = "re")
## 
## Parametric coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              5.16902    0.08458  61.114  < 2e-16 ***
## Genetic_DiversityMedium -0.40435    0.09679  -4.178 2.94e-05 ***
## Genetic_DiversityLow    -0.51630    0.08833  -5.845 5.06e-09 ***
## BlockBlock4             -0.08511    0.08693  -0.979 0.327538    
## BlockBlock5             -0.33631    0.10114  -3.325 0.000884 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                  edf    Ref.df  Chi.sq p-value
## s(Generation_Eggs)                         3.371e+00 3.781e+00 164.635  <2e-16
## s(Generation_Eggs):Genetic_DiversityHigh   1.000e+00 1.000e+00   5.508  0.0189
## s(Generation_Eggs):Genetic_DiversityMedium 3.097e-05 6.149e-05   0.000  0.9996
## s(Generation_Eggs):Genetic_DiversityLow    1.000e+00 1.001e+00   0.494  0.4823
## s(Population)                              1.988e+01 6.600e+01  30.347  0.0037
##                                               
## s(Generation_Eggs)                         ***
## s(Generation_Eggs):Genetic_DiversityHigh   *  
## s(Generation_Eggs):Genetic_DiversityMedium    
## s(Generation_Eggs):Genetic_DiversityLow       
## s(Population)                              ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 91/92
## R-sq.(adj) =  0.612   Deviance explained = 51.5%
## -ML = 1974.9  Scale est. = 1         n = 353

3.4.1.4 Testing for significance

# Three important methods to determine the significance of predictors:
## -1 Model-comparison procedure (using function compareML).
## -2 Test statistics of the smooth terms as presented in the summary.
## -3 Visualization of the smooth terms (e.g., difference plots and difference surfaces) but useful only with significative interaction https://jacolienvanrij.com/Tutorials/GAMM.html 


###### 1- Model comparison 
### Test interaction Generation*Genetic Diversity
mod_1 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +   s(Generation_Eggs, k=5) +
                    s(Generation_Eggs, by = Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), method="ML",  family = mgcv::nb(),  data=data_dyn)
mod_2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block  +  s(Generation_Eggs, k=5) +
                    s(Population, bs="re"), data=data_dyn, family = mgcv::nb(),  method='ML')

itsadug::compareML( mod_2,mod_1,suggest.report = TRUE)
## mod_2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Population, bs = "re")
## 
## mod_1: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population, 
##     bs = "re")
## 
## Report suggestion: The Chi-Square test on the ML scores indicates that model mod_1 is [not significantly] better than model mod_2 (X2(6.00)=3.003, p>.1).
## -----
##   Model    Score Edf Difference    Df p.value Sig.
## 1 mod_2 1977.920   8                              
## 2 mod_1 1974.918  14      3.003 6.000   0.423     
## 
## AIC difference: 4.21, model mod_1 has lower AIC.
anova(mod_1, mod_2, test = "LRT")
## Analysis of Deviance Table
## 
## Model 1: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population, 
##     bs = "re")
## Model 2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Population, bs = "re")
##   Resid. Df Resid. Dev      Df Deviance Pr(>Chi)  
## 1    304.92     3891.7                            
## 2    310.72     3905.0 -5.8082  -13.335  0.03402 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#CompareML is preferred over other functions such as AIC for models that include an AR1 model or random effects (especially nonlinear random smooths using bs='fs'). CompareML also reports the AIC difference, but that value should be treated with care.

#AIC(mod_1, mod_2)
# itsadug::gamtabs(mod_1)
# itsadug::gamtabs(mod_2, type="latex")
# summary(mod_1)


### Test effect Generation
mod_2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block  +
                    s(Generation_Eggs, k=5) +
                    s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
mod_3 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block  +
                    s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
itsadug::compareML(mod_3, mod_2)
## mod_3: Nb_adults ~ Genetic_Diversity + Block + s(Population, bs = "re")
## 
## mod_2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Population, bs = "re")
## 
## Chi-square test of ML scores
## -----
##   Model    Score Edf Difference    Df  p.value Sig.
## 1 mod_3 2072.836   6                               
## 2 mod_2 1977.920   8     94.916 2.000  < 2e-16  ***
## 
## AIC difference: 192.23, model mod_2 has lower AIC.
### Test intercept Genetic Diversity
mod_2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block  +  
                    s(Generation_Eggs, k=5) +
                    s(Population, bs="re"), data=data_dyn, family = mgcv::nb(),   method='ML')
mod_3 <- mgcv::gam(Nb_adults ~ Block  +   
                    s(Generation_Eggs, k=5) +
                    s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
itsadug::compareML(mod_3, mod_2)
## mod_3: Nb_adults ~ Block + s(Generation_Eggs, k = 5) + s(Population, 
##     bs = "re")
## 
## mod_2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Population, bs = "re")
## 
## Chi-square test of ML scores
## -----
##   Model    Score Edf Difference    Df   p.value Sig.
## 1 mod_3 1993.353   6                                
## 2 mod_2 1977.920   8     15.432 2.000 1.986e-07  ***
## 
## AIC difference: 9.55, model mod_2 has lower AIC.
logLik(mod_3)
## 'log Lik.' -1942.118 (df=45.38753)
logLik(mod_2)# 
## 'log Lik.' -1952.505 (df=30.22338)
mod_final <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block  +
                         s(Generation_Eggs, k=5) +
                         s(Population, bs="re"), data=data_dyn,  family = mgcv::nb(), method='ML')



###### 2- Summary of the model
mod_1 <- mgcv::gam(Nb_adults ~ Genetic_Diversity-1 + Block +   s(Generation_Eggs, k=5) +
                    s(Generation_Eggs, by = Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), method="ML",  family = mgcv::nb(),  data=data_dyn)
summary(mod_1)
## 
## Family: Negative Binomial(2.957) 
## Link function: log 
## 
## Formula:
## Nb_adults ~ Genetic_Diversity - 1 + Block + s(Generation_Eggs, 
##     k = 5) + s(Generation_Eggs, by = Genetic_Diversity, k = 5) + 
##     s(Population, bs = "re")
## 
## Parametric coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## Genetic_DiversityHigh    5.16902    0.08458  61.114  < 2e-16 ***
## Genetic_DiversityMedium  4.76467    0.08912  53.466  < 2e-16 ***
## Genetic_DiversityLow     4.65271    0.07734  60.161  < 2e-16 ***
## BlockBlock4             -0.08511    0.08693  -0.979 0.327538    
## BlockBlock5             -0.33631    0.10114  -3.325 0.000884 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                  edf    Ref.df   Chi.sq p-value
## s(Generation_Eggs)                         3.371e+00 3.781e+00  151.191 < 2e-16
## s(Generation_Eggs):Genetic_DiversityHigh   1.000e+00 1.000e+00    2.677 0.10182
## s(Generation_Eggs):Genetic_DiversityMedium 3.097e-05 6.149e-05 9672.819 < 2e-16
## s(Generation_Eggs):Genetic_DiversityLow    1.000e+00 1.001e+00    0.000 0.99943
## s(Population)                              1.988e+01 6.600e+01   30.349 0.00372
##                                               
## s(Generation_Eggs)                         ***
## s(Generation_Eggs):Genetic_DiversityHigh      
## s(Generation_Eggs):Genetic_DiversityMedium ***
## s(Generation_Eggs):Genetic_DiversityLow       
## s(Population)                              ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 91/92
## R-sq.(adj) =  0.612   Deviance explained = 51.5%
## -ML = 1974.9  Scale est. = 1         n = 353
mgcv::summary.gam(mod_1, )
## 
## Family: Negative Binomial(2.957) 
## Link function: log 
## 
## Formula:
## Nb_adults ~ Genetic_Diversity - 1 + Block + s(Generation_Eggs, 
##     k = 5) + s(Generation_Eggs, by = Genetic_Diversity, k = 5) + 
##     s(Population, bs = "re")
## 
## Parametric coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## Genetic_DiversityHigh    5.16902    0.08458  61.114  < 2e-16 ***
## Genetic_DiversityMedium  4.76467    0.08912  53.466  < 2e-16 ***
## Genetic_DiversityLow     4.65271    0.07734  60.161  < 2e-16 ***
## BlockBlock4             -0.08511    0.08693  -0.979 0.327538    
## BlockBlock5             -0.33631    0.10114  -3.325 0.000884 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                  edf    Ref.df   Chi.sq p-value
## s(Generation_Eggs)                         3.371e+00 3.781e+00  151.191 < 2e-16
## s(Generation_Eggs):Genetic_DiversityHigh   1.000e+00 1.000e+00    2.677 0.10182
## s(Generation_Eggs):Genetic_DiversityMedium 3.097e-05 6.149e-05 9672.819 < 2e-16
## s(Generation_Eggs):Genetic_DiversityLow    1.000e+00 1.001e+00    0.000 0.99943
## s(Population)                              1.988e+01 6.600e+01   30.349 0.00372
##                                               
## s(Generation_Eggs)                         ***
## s(Generation_Eggs):Genetic_DiversityHigh      
## s(Generation_Eggs):Genetic_DiversityMedium ***
## s(Generation_Eggs):Genetic_DiversityLow       
## s(Population)                              ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 91/92
## R-sq.(adj) =  0.612   Deviance explained = 51.5%
## -ML = 1974.9  Scale est. = 1         n = 353
data_predict_gam <- data.frame(Generation_Eggs = rep(seq(2,6, 0.01),3),
                               Genetic_Diversity = rep(levels(data_dyn$Genetic_Diversity),
                                                       each=length(seq(2,6,0.01))),
                               Block = "Block4",
                               Population = rep(c("High1","Med1","Low1"),
                                                each=length(seq(2,6,0.01))))
data_predict_gam$fit<- mgcv::predict.gam(mod_final, newdata = data_predict_gam,
            exclude_terms = list("s(Population)", "Block"),
            values = list(Population = NULL, Block = NULL),
            type = "response")









###### Additional tests

# With factors such as Group we cannot use the summary to test differences between groups. However, to investigate differences between groups, we could use difference smooths with ordered factors or binary predictors.
data_dyn$OFGroup_diversity <- as.factor(data_dyn$Genetic_Diversity)
# change factor to ordered factor:
data_dyn$OFGroup_diversity <- as.ordered(data_dyn$OFGroup)
# change contrast to treatment coding (difference curves)
contrasts(data_dyn$OFGroup_diversity) <- 'contr.treatment'
# Inspect contrasts:
contrasts(data_dyn$OFGroup_diversity)
##        Medium Low
## High        0   0
## Medium      1   0
## Low         0   1
mod_1 <- mgcv::gam(Nb_adults ~ OFGroup_diversity + Block +  
                    s(Generation_Eggs, k=5) + 
                     s(Generation_Eggs, by = OFGroup_diversity, k=5) + 
                    s(Population, bs="re"), method="ML", family = mgcv::nb(), data=data_dyn)


summary(mod_1)
## 
## Family: Negative Binomial(2.957) 
## Link function: log 
## 
## Formula:
## Nb_adults ~ OFGroup_diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Generation_Eggs, by = OFGroup_diversity, k = 5) + s(Population, 
##     bs = "re")
## 
## Parametric coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              5.16902    0.08458  61.114  < 2e-16 ***
## OFGroup_diversityMedium -0.40435    0.09679  -4.178 2.94e-05 ***
## OFGroup_diversityLow    -0.51630    0.08833  -5.845 5.06e-09 ***
## BlockBlock4             -0.08511    0.08693  -0.979 0.327535    
## BlockBlock5             -0.33631    0.10114  -3.325 0.000883 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                               edf Ref.df  Chi.sq p-value    
## s(Generation_Eggs)                          3.371  3.781 138.039 < 2e-16 ***
## s(Generation_Eggs):OFGroup_diversityMedium  1.001  1.002   5.503 0.01900 *  
## s(Generation_Eggs):OFGroup_diversityLow     1.000  1.000   3.278 0.07023 .  
## s(Population)                              19.879 66.000  30.350 0.00379 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.612   Deviance explained = 51.5%
## -ML = 1974.9  Scale est. = 1         n = 353
#We confirm: Interaction significative
# The summary now provides the significance of the difference terms: Medium and High does show a slightly significantly different trend over Time (F=5.50, edf=1, p=0.02), Low and High does not show significantly different trend over Time (F=19.8, edf=1, p=0.07). 


  
###### 2-  Visualization of the smooth terms
# seful only with significative interaction 
# Which is not the case here
# But see: https://jacolienvanrij.com/Tutorials/GAMM.html

3.4.1.5 Plot

data_dyn <- data[data$Generation_Eggs>=2&data$Extinction_finalstatus=="no",]

mod_final <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + 
                    s(Generation_Eggs, k=5) + 
                    s(Population, bs="re"),  family = mgcv::nb(), data=data_dyn,  method='REML')

#Check transformation: OK  
# mod_final <- mgcv::gam(Nb_adults ~ Genetic_Diversity  + 
#                     s(Generation_Eggs, k=5),  family = mgcv::nb(), data=data_dyn)
# 
# mod_A_p <- tidymv::get_gam_predictions(model = mod_final,
#                                        series = Generation_Eggs,
#                                        transform = exp)
# tapply(mod_A_p$Nb_adults,mod_A_p$Genetic_Diversity,min)
# 
# 
# data_predict_gam <- data.frame(Generation_Eggs = rep(seq(2,6, 0.01),3),
#                                Genetic_Diversity = rep(levels(data_dyn$Genetic_Diversity),
#                                                        each=length(seq(2,6,0.01))))
# data_predict_gam$fit<- mgcv::predict.gam(mod_final, newdata = data_predict_gam,
#             type = "response")
# tapply(data_predict_gam$fit,data_predict_gam$Genetic_Diversity,min)
# 
# 
# data_predict_gam


# Plot  block effect
mod_block_P <- tidymv::predict_gam(mod_final, 
                               exclude_terms = list("s(Population)"), 
                               values = list(Population = NULL))
ggplot(data = mod_block_P, aes(Generation_Eggs, fit)) +
    facet_wrap(~ Block) +
    tidymv::geom_smooth_ci(Genetic_Diversity)

########## EXCLUDING BLOCK EFFECT 
mod_final <- mgcv::gam(Nb_adults ~ Genetic_Diversity  + 
                         s(Generation_Eggs, k=5) +
                         s(Population, bs="re"),  family = mgcv::nb(), 
                       data=data_dyn, method='REML')


mod_A_p <- tidymv::get_gam_predictions(model = mod_final,
                                       series = Generation_Eggs,
                                       exclude_random = TRUE,
                                       transform = exp)

tidymv::plot_smooths(mod_final, Generation_Eggs, Genetic_Diversity,
                     exclude_random = TRUE,
                     transform = exp)

# ## Other functions 
# data_predict_gam <- data.frame(Generation_Eggs = rep(seq(2,6, 0.01),3),
#                                Genetic_Diversity = rep(levels(data_dyn$Genetic_Diversity),
#                                                        each=length(seq(2,6,0.01))),
#                                Block = "Block4",
#                                Population = rep(c("High1","Med1","Low1"),
#                                                 each=length(seq(2,6,0.01))))
# library(mgcv)
# data_predict_gam$fit<- mgcv::predict.gam(mod_final, newdata = data_predict_gam,
#             exclude_terms = list("s(Population)", "Block"),
#             values = list(Population = NULL, Block = NULL),
#             type = "response")
#ggplot(data = data_predict_gam, aes(Generation_Eggs, fit))



# ## PLOT
# plot_GAM_Ucurve <- ggplot(data = mod_A_p, aes(Generation_Eggs, fit)) +
#   geom_point(data = data[data$Extinction_finalstatus=="no",], 
#              aes(x = Generation_Eggs,
#                                 y = Nb_adults,
#                                group = Genetic_Diversity,
#                                 colour = Genetic_Diversity),
#              position = position_dodge2(0.3), 
#              size = 0.6, alpha = 0.9) +
#   tidymv::plot_smooths(mod_final, Generation_Eggs, Genetic_Diversity,
#                      exclude_random = TRUE,
#                      transform = exp) +
#   tidymv::geom_smooth_ci(Genetic_Diversity,
#                  size = 1.5, linetype = 1)  +
#   ylab("Population size") +
#   xlab("Generation") +
#   scale_color_manual(name="Demographic history",
#                      breaks = c("High", "Medium", "Low"),
#                     labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
#                      values = c("#00AFBB","#FC4E07","#E7B800")) +
#  xlim(1.8,6.2) +
#  theme_LO  + 
#  theme(panel.grid.major.y = element_blank(),
#         panel.grid.minor.y = element_blank())
#  plot_GAM_Ucurve


 ## PLOT
plot_GAM_Ucurve <- ggplot(data = data, aes(Generation_Eggs, Nb_adults)) +
  geom_point(data = data[data$Extinction_finalstatus=="no",], 
             aes(x = Generation_Eggs,
                                y = Nb_adults,
                               group = Genetic_Diversity,
                                colour = Genetic_Diversity),
             position = position_dodge2(0.3), 
             size = 0.6, alpha = 0.9) +
  ggplot2::geom_ribbon(data = mod_A_p, 
                       aes(ymin = CI_lower, 
                           ymax = CI_upper, 
                           x = Generation_Eggs, 
                           group = Genetic_Diversity, 
                           fill = Genetic_Diversity, 
                           color =Genetic_Diversity), 
                       alpha=0.2, size=0.05) + 
  geom_line(data = mod_A_p,
            aes(x = Generation_Eggs, y = Nb_adults, color = Genetic_Diversity),
            size = 0.8, linetype = 1) + 
  ylab("Population size") +
  xlab("Generation") +
  scale_fill_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"),
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
                     values = c("#00AFBB","#FD7C49","#F5C400")) +
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"),
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
                     values = c("#00AFBB","#FD7C49","#F5C400")) +
 xlim(1.8,6.2) +
 theme_LO  + 
 theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())
 plot_GAM_Ucurve

# 
# cowplot::save_plot(file = here::here("figures","PopulationSize_GAM.pdf"),
#                    plot_GAM_Ucurve, base_height = 10/cm(1),
#                    base_width = 13/cm(1), dpi = 610)
# 



#To estimate confidence interval at the end of the experiment
#Default on tidymv::geom_smooth_ci is ci_z = 1.96
#population size at the end
data_gam_end <- as.data.frame(mod_A_p[mod_A_p$Generation_Eggs==6,])


#minimum population size 
tapply(mod_A_p$Nb_adults,mod_A_p$Genetic_Diversity,min)
##     High   Medium      Low 
## 96.00447 65.15853 60.34392
mod_A_p[mod_A_p$Genetic_Diversity=="High"&mod_A_p$Nb_adults<=96.1,]
##    .idx Genetic_Diversity Generation_Eggs Population Nb_adults         SE
## 49    1              High        4.666667      High1  96.00447 0.08171657
##    CI_upper CI_lower
## 49 112.6808 81.79616
mod_A_p[mod_A_p$Genetic_Diversity=="Medium"&mod_A_p$Nb_adults<=65.2,]
##    .idx Genetic_Diversity Generation_Eggs Population Nb_adults         SE
## 50    3            Medium        4.666667      High1  65.15853 0.09348447
##    CI_upper CI_lower
## 50 78.26126 54.24949
mod_A_p[mod_A_p$Genetic_Diversity=="Low"&mod_A_p$Nb_adults<=60.4,]
##    .idx Genetic_Diversity Generation_Eggs Population Nb_adults         SE
## 51    2               Low        4.666667      High1  60.34392 0.08309636
##    CI_upper CI_lower
## 51 71.01769 51.27438
#### Plot with the interaction effect 
mod_final_interaction <- mgcv::gam(Nb_adults ~ Genetic_Diversity  + 
                    s(Generation_Eggs, k=5) + s(Generation_Eggs, by = Genetic_Diversity, k=5) + 
                    s(Population, bs="re"),  family = mgcv::nb(), 
                    data=data_dyn,  method='REML')


# Plot interaction
mod_A_interaction <- tidymv::get_gam_predictions(model = mod_final_interaction,
                                       series = Generation_Eggs,
                                       exclude_random = TRUE,
                                       transform = exp)


plot_GAM_Ucurve_interaction <- ggplot(data = data, aes(Generation_Eggs, Nb_adults)) +
  geom_point(data = data[data$Extinction_finalstatus=="no",], 
             aes(x = Generation_Eggs,
                                y = Nb_adults,
                               group = Genetic_Diversity,
                                colour = Genetic_Diversity),
             position = position_dodge2(0.3), 
             size = 0.6, alpha = 0.9) +
  ggplot2::geom_ribbon(data = mod_A_interaction, 
                       aes(ymin = CI_lower, 
                           ymax = CI_upper, 
                           x = Generation_Eggs, 
                           group = Genetic_Diversity, 
                           fill = Genetic_Diversity, 
                           color =Genetic_Diversity), 
                       alpha=0.2, size=0.05) + 
  geom_line(data = mod_A_interaction,
            aes(x = Generation_Eggs, y = Nb_adults, color = Genetic_Diversity),
            size = 0.8, linetype = 1) + 
  ylab("Population size") +
  xlab("Generation") +
  scale_fill_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"),
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
                     values = c("#00AFBB","#FD7C49","#F5C400")) +
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"),
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
                     values = c("#00AFBB","#FD7C49","#F5C400")) +
 xlim(1.8,6.2) +
 theme_LO  + 
 theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())
 plot_GAM_Ucurve_interaction

#minimum population size 
mod_A_interaction[mod_A_interaction$Generation_Eggs==6,]
##    .idx Genetic_Diversity Generation_Eggs Population Nb_adults        SE
## 73    1              High               6      High1 143.53899 0.1074390
## 74    3            Medium               6      High1  74.57935 0.1284980
## 75    2               Low               6      High1  76.73883 0.1119476
##     CI_upper  CI_lower
## 73 177.18373 116.28292
## 74  95.93969  57.97475
## 75  95.56677  61.62026
tapply(mod_A_interaction$Nb_adults,mod_A_interaction$Genetic_Diversity,min)
##     High   Medium      Low 
## 99.81774 62.02463 58.36272
mod_A_interaction[mod_A_interaction$Genetic_Diversity=="High"&mod_A_interaction$Nb_adults<=99.82,]
##    .idx Genetic_Diversity Generation_Eggs Population Nb_adults         SE
## 46    1              High             4.5      High1  99.81774 0.08377412
##    CI_upper CI_lower
## 46 117.6299  84.7028
mod_A_interaction[mod_A_interaction$Genetic_Diversity=="Medium"&mod_A_interaction$Nb_adults<=62.03,]
##    .idx Genetic_Diversity Generation_Eggs Population Nb_adults        SE
## 53    3            Medium        4.833333      High1  62.02463 0.1025714
##    CI_upper CI_lower
## 53 75.83587 50.72869
mod_A_interaction[mod_A_interaction$Genetic_Diversity=="Low"&mod_A_interaction$Nb_adults<=58.37,]
##    .idx Genetic_Diversity Generation_Eggs Population Nb_adults         SE
## 51    2               Low        4.666667      High1  58.36272 0.08935786
##    CI_upper CI_lower
## 51  69.5342 48.98607
legend_gamm <- lemon::g_legend(plot_GAM_Ucurve_interaction)

PLOT_ALL_GAMM <- cowplot::ggdraw() + 
  cowplot::draw_plot(plot_GAM_Ucurve + theme(legend.position = "none"), 
            x = 0.01, y = 0, width = 0.4, height = 0.95) + 
  cowplot::draw_plot(plot_GAM_Ucurve_interaction + theme(legend.position = "none"), 
            x = 0.4, y = 0, width = 0.4, height = 0.95) + 
  cowplot::draw_plot(legend_gamm, x = 0.84, y = 0.5, width = 0.1, height = 0.1) + 
  cowplot::draw_plot_label(c("A", 
                             "B"),   
                  x = c(0.01,0.4), 
                  y = c(1, 1), 
                  hjust = c(-0.25, -0.25), 
                  vjust = c(1.5, 1.5),
                  size = 17) + 
  cowplot::draw_plot_label(c("Best-fit model without interaction ", 
                             "Full model with interaction "),  
                  x = c(0.23, 0.63), 
                  y = c(1, 1), 
                  hjust = c(0.5, 0.5), 
                  vjust = c(1.5, 1.5),
                  size = 12) 
 PLOT_ALL_GAMM

# 
# cowplot::save_plot(file = here::here("figures","Fig3_Dynamics_GAMM.pdf"),
#                    PLOT_ALL_GAMM, base_height = 10/cm(1),
#                    base_width = 22/cm(1), dpi = 610)
# 

3.5 Variance over time: Coefficient of variation

SUM_CoefVar <- Rmisc::summarySE(data[data$Extinction_finalstatus=="no"&data$Generation_Eggs>=2,], 
                        measurevar = c("Nb_adults"),
                       groupvars = c("Genetic_Diversity","Generation_Eggs"), 
                       na.rm = TRUE)

SUM_CoefVar
##    Genetic_Diversity Generation_Eggs  N Nb_adults       sd        se       ci
## 1               High               2 27 344.29630 73.77294 14.197610 29.18360
## 2               High               3 27 151.85185 80.96899 15.582489 32.03027
## 3               High               4 26 114.00000 80.20224 15.728954 32.39439
## 4               High               5 27 103.62963 51.56486  9.923661 20.39838
## 5               High               6 27 147.66667 42.60282  8.198916 16.85311
## 6             Medium               2 18 271.27778 92.04526 21.695276 45.77303
## 7             Medium               3 18 135.94444 88.56867 20.875835 44.04416
## 8             Medium               4 18  78.94444 63.91858 15.065754 31.78596
## 9             Medium               5 18  68.27778 48.73434 11.486794 24.23502
## 10            Medium               6 18  73.61111 44.05496 10.383855 21.90802
## 11               Low               2 26 266.11538 74.47809 14.606356 30.08235
## 12               Low               3 26 112.73077 81.93000 16.067795 33.09224
## 13               Low               4 26  60.07692 35.26463  6.915962 14.24369
## 14               Low               5 25  63.72000 43.11179  8.622359 17.79567
## 15               Low               6 26  82.96154 42.28899  8.293553 17.08089
SUM_CoefVar$CoefVar <- (SUM_CoefVar$sd / SUM_CoefVar$Nb_adults) * 100
SUM_CoefVar
##    Genetic_Diversity Generation_Eggs  N Nb_adults       sd        se       ci
## 1               High               2 27 344.29630 73.77294 14.197610 29.18360
## 2               High               3 27 151.85185 80.96899 15.582489 32.03027
## 3               High               4 26 114.00000 80.20224 15.728954 32.39439
## 4               High               5 27 103.62963 51.56486  9.923661 20.39838
## 5               High               6 27 147.66667 42.60282  8.198916 16.85311
## 6             Medium               2 18 271.27778 92.04526 21.695276 45.77303
## 7             Medium               3 18 135.94444 88.56867 20.875835 44.04416
## 8             Medium               4 18  78.94444 63.91858 15.065754 31.78596
## 9             Medium               5 18  68.27778 48.73434 11.486794 24.23502
## 10            Medium               6 18  73.61111 44.05496 10.383855 21.90802
## 11               Low               2 26 266.11538 74.47809 14.606356 30.08235
## 12               Low               3 26 112.73077 81.93000 16.067795 33.09224
## 13               Low               4 26  60.07692 35.26463  6.915962 14.24369
## 14               Low               5 25  63.72000 43.11179  8.622359 17.79567
## 15               Low               6 26  82.96154 42.28899  8.293553 17.08089
##     CoefVar
## 1  21.42717
## 2  53.32104
## 3  70.35285
## 4  49.75880
## 5  28.85067
## 6  33.93026
## 7  65.15063
## 8  80.96653
## 9  71.37658
## 10 59.84825
## 11 27.98714
## 12 72.67758
## 13 58.69912
## 14 67.65818
## 15 50.97421
# library(DescTools)
# DescTools::CoefVar(data[data$Extinction_finalstatus=="no"&
#                           data$Generation_Eggs==2&
#                           data$Genetic_Diversity=="Low",]$Nb_adults, conf.level = 0.95)
data_extant <- data[data$Extinction_finalstatus=="no"&
                          data$Generation_Eggs>=2,]
# 
# tapply(data_extant$Nb_adults, 
#        list(data_extant$Generation_Eggs, 
#             data_extant$Genetic_Diversity), 
#        CoefVar, na.rm=TRUE)


SUM_CoefVar_DescTools <- SUM_CoefVar[,1:2]
SUM_CoefVar_DescTools$CoefVar <- NA
SUM_CoefVar_DescTools$CI_lower <- NA
SUM_CoefVar_DescTools$CI_upper <- NA 
SUM_CoefVar_DescTools$Mean <- NA 


for (i in levels(SUM_CoefVar_DescTools$Genetic_Diversity)) {
  for (j in c(2:6)) {
SUM_CoefVar_DescTools$CoefVar[SUM_CoefVar_DescTools$Genetic_Diversity==i&
                                 SUM_CoefVar_DescTools$Generation_Eggs==j] <- DescTools::CoefVar(data_extant[data_extant$Genetic_Diversity==i&
                                 data_extant$Generation_Eggs==j,]$Nb_adults, 
                                     conf.level = 0.95, na.rm=TRUE)[1]
SUM_CoefVar_DescTools$CI_lower[SUM_CoefVar_DescTools$Genetic_Diversity==i&
                                 SUM_CoefVar_DescTools$Generation_Eggs==j] <- DescTools::CoefVar(data_extant[data_extant$Genetic_Diversity==i&
                                 data_extant$Generation_Eggs==j,]$Nb_adults, 
                                     conf.level = 0.95, na.rm=TRUE)[2]
SUM_CoefVar_DescTools$CI_upper[SUM_CoefVar_DescTools$Genetic_Diversity==i&
                                 SUM_CoefVar_DescTools$Generation_Eggs==j] <- DescTools::CoefVar(data_extant[data_extant$Genetic_Diversity==i&
                                 data_extant$Generation_Eggs==j,]$Nb_adults, 
                                     conf.level = 0.95, na.rm=TRUE)[3]

SUM_CoefVar_DescTools$Mean[SUM_CoefVar_DescTools$Genetic_Diversity==i&
                                 SUM_CoefVar_DescTools$Generation_Eggs==j] <- mean(data_extant[data_extant$Genetic_Diversity==i&
                   data_extant$Generation_Eggs==j,]$Nb_adults, 
     na.rm = TRUE)

  }
}
  



### Plot
ggplot2::ggplot(SUM_CoefVar, 
                aes(x = Genetic_Diversity,  
                    y = CoefVar, 
                    fill = Genetic_Diversity, 
                    color = Genetic_Diversity, 
                    shape = factor(Generation_Eggs))) + 
  #geom_boxplot(outlier.color = NA) +
  geom_point(size = 4, alpha = 1) +
  scale_shape_manual(name="Generation", values = c(21,22,23,24,25)) +
  scale_fill_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                     labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07", "#E7B800")) +
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                     labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07", "#E7B800")) +
  ggplot2::scale_x_discrete(labels = c("Diverse","Intermediate bottleneck","Strong bottleneck")) + 
  guides(color = FALSE, fill = FALSE) + 
  ylab("Coefficient of variation (%)") +
  xlab("Demographic history") +
  theme_LO + 
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())

### Plot
plot_CV <- ggplot2::ggplot(SUM_CoefVar, 
                aes(x = factor(Generation_Eggs),  
                    y = CoefVar, 
                    color = Genetic_Diversity)) + 
  geom_point(size = 4, alpha = 1) +
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                     labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07", "#E7B800")) +
  ylab("Coefficient of variation (%)") +
  xlab("Generation") +
  theme_LO + 
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())
plot_CV

plot_CV_function <- ggplot2::ggplot(SUM_CoefVar_DescTools, 
                aes(x = factor(Generation_Eggs),  
                    y = CoefVar, 
                    color = Genetic_Diversity)) + 
  geom_point(position = position_dodge(0.4), 
             size = 4, alpha = 1) +
  geom_errorbar(aes(x = factor(Generation_Eggs),
                    ymin = CI_lower,
                    ymax = CI_upper,
                    colour = Genetic_Diversity),
             position = position_dodge(0.4), size = 0.6, width=0, alpha = 0.8) +
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                     labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07", "#E7B800")) +
  ylab("Coefficient of variation") +
  xlab("Generation") +
  theme_LO + 
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())
plot_CV_function

# cowplot::save_plot(here::here("figures","Fig4_CoefficientVariation.pdf"),   plot_CV_function,
#           base_height = 12/cm(1), base_width = 18/cm(1), dpi = 610)

# 


plot_testCV <- ggplot2::ggplot(SUM_CoefVar_DescTools, 
                aes(x = Mean,  
                    y = CoefVar, 
                    color = Genetic_Diversity)) + 
  geom_point(size = 4, alpha = 1) +
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                     labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07", "#E7B800")) +
  theme_LO 

plot_testCV

# ## Statistic 
# mod0 <- lm(CoefVar ~ Genetic_Diversity, data=SUM_CoefVar)
# mod1 <- lm(CoefVar ~ 1, data=SUM_CoefVar)
# anova(mod0, mod1, test = "Chisq")
# 
# mod0 <- lm(CoefVar ~ Genetic_Diversity-1, data=SUM_CoefVar)
# summary(mod0)
# confint(mod0)

3.6 Growth rate G6

3.6.1 Distribution analysis

head(data_phenotyping)
##   Population   Week  Block ID_Rep Divsersity Population.1 Box Initial.N   Start
## 1      High1 5-week Block3     52       High            1   1        30  7/8/21
## 2      High1 5-week Block3     53       High            1   2        30  7/8/21
## 3     High13 5-week Block4    129       High           13   1        30 7/15/21
## 4     High13 5-week Block4    130       High           13   2        30 7/15/21
## 5     High13 5-week Block4    131       High           13   3        30 7/15/21
## 6     High14 5-week Block4    132       High           14   1        30 7/15/21
##       End Larvae Pupae Adults    Lambda Genetic_Diversity Nb_ttx  p_adults
## 1 8/12/21      3     9     97 3.2333333              High    109 0.8899083
## 2 8/12/21      1     1      8 0.2666667              High     10 0.8000000
## 3 8/19/21      2     0     84 2.8000000              High     86 0.9767442
## 4 8/19/21      1     2    104 3.4666667              High    107 0.9719626
## 5 8/19/21      2     1     84 2.8000000              High     87 0.9655172
## 6 8/19/21      1     0     83 2.7666667              High     84 0.9880952
##      p_pupae    p_larvae He_remain  He_start    He_exp    He_end
## 1 0.08256881 0.027522936 0.9986008 0.9986008 0.9679591 0.9666047
## 2 0.10000000 0.100000000 0.9986008 0.9986008 0.9679591 0.9666047
## 3 0.00000000 0.023255814 0.9986008 0.9986008 0.9786327 0.9772634
## 4 0.01869159 0.009345794 0.9986008 0.9986008 0.9786327 0.9772634
## 5 0.01149425 0.022988506 0.9986008 0.9986008 0.9786327 0.9772634
## 6 0.00000000 0.011904762 0.9986008 0.9986008 0.9766045 0.9752381
#Without considering extinct populations 

#tapply(data_phenotyping$Lambda,data_phenotyping$Population,length)

hist(data_phenotyping$Lambda, breaks=50)

# #Which distribution
fitdistrplus::descdist(data_phenotyping$Lambda, discrete = FALSE)

## summary statistics
## ------
## min:  0   max:  6.2 
## median:  2.1 
## mean:  2.213082 
## estimated sd:  1.058233 
## estimated skewness:  0.5497908 
## estimated kurtosis:  3.726017
#############################################################
################## Determine distribution ###################
#############################################################
#Test normal
f1 <- fitdistrplus::fitdist(data_phenotyping$Lambda, "norm")
g1 <- fitdistrplus::gofstat(f1)
g1$kstest
##     1-mle-norm 
## "not rejected"
plot(f1)

f1$aic
## [1] 551.898
#Test log-normal
f1 <- fitdistrplus::fitdist(data_phenotyping$Lambda+1, "lnorm")
g1 <- fitdistrplus::gofstat(f1)
g1$kstest
##    1-mle-lnorm 
## "not rejected"
plot(f1)

f1$aic
## [1] 552.5385
#Test exp
f1 <- fitdistrplus::fitdist(data_phenotyping$Lambda, "exp")
g1 <- fitdistrplus::gofstat(f1)
g1$kstest
##  1-mle-exp 
## "rejected"
plot(f1)

f1$aic
## [1] 669.5117
#Test log-normal
f1 <- fitdistrplus::fitdist(data_phenotyping$Lambda+1, "gamma")
g1 <- fitdistrplus::gofstat(f1)
g1$kstest
##    1-mle-gamma 
## "not rejected"
plot(f1)

f1$aic
## [1] 544.6686
## Poisson lognormal model
mlog <- lme4::lmer(log(Lambda+1) ~ Genetic_Diversity + Block + (1|Population), 
                   data=data_phenotyping)
#summary(mlog)

# Square root
msqrt <- lme4::lmer(sqrt(Lambda) ~ Genetic_Diversity + Block + (1|Population), 
                   data=data_phenotyping)
#summary(msqrt)

# Normal
mnorm <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population), 
                   data=data_phenotyping)

# # Log Normal
# mlogN <- glm(log(Lambda) ~ Genetic_Diversity + Block,  
#                      family = gaussian, data=data_phenotyping)



## Compare AIC
AIC(mlog, msqrt,mnorm)
##       df      AIC
## mlog   7 120.8153
## msqrt  7 156.0901
## mnorm  7 528.8983
#Square root a better fits to the data 
#But we compare different values: Growth rate and Growth rate+1

# ## Simulate data 
x.teo.log <- unlist(simulate(mlog))
x.teo.sqrt <- unlist(simulate(msqrt))
x.teo.norm <- unlist(simulate(mnorm))

# ## QQplot to compared Negative binomial and Poisson log normal distributions
qqplot(data_phenotyping$Lambda, x.teo.log,
       main="QQ-plot", xlab="Observed data", ylab="Simulated data",
       las=1, xlim = c(0, 10), ylim = c(0, 10), col='blue') ## QQ-plot
points(sort(data_phenotyping$Lambda), sort(x.teo.sqrt), pch=1, col='red')
points(sort(data_phenotyping$Lambda), sort(x.teo.norm), pch=1, col='green')
abline(0,1)
# ## Add legend
legend(0, 7, legend=c("Log", "Sqrt", "Normal"),
       col=c("blue", "red", "green"), 
       pch=1, bty="n")

# ## Normal distribution provides a good fit to the data


# QQ plot
# qqnorm(resid(mlog))
# qqline(resid(mlog))
# qqnorm(resid(msqrt))
# qqline(resid(msqrt))
# qqnorm(resid(mnorm))
# qqline(resid(mnorm)) #best fit

#Distribution Residuals
hist(residuals(mlog),col="yellow",freq=F)

hist(residuals(msqrt),col="yellow",freq=F)  #Seems the better fit

hist(residuals(mnorm),col="yellow",freq=F) #Seems the better fit

#Test normality
g1 <- fitdistrplus::gofstat(fitdistrplus::fitdist(residuals(mnorm), "norm"))
g1$kstest 
##     1-mle-norm 
## "not rejected"
g1 <- fitdistrplus::gofstat(fitdistrplus::fitdist(residuals(msqrt), "norm"))
g1$kstest 
##     1-mle-norm 
## "not rejected"
g1 <- fitdistrplus::gofstat(fitdistrplus::fitdist(residuals(mnorm), "norm"))
g1$kstest
##     1-mle-norm 
## "not rejected"
## Compare AIC
AIC(mlog, msqrt, mnorm)
##       df      AIC
## mlog   7 120.8153
## msqrt  7 156.0901
## mnorm  7 528.8983
#SLog a better fits to the data 

3.6.2 Analysis

#############################################################
##################        Analysis        ###################
#############################################################

mod5 <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population), 
                   data=data_phenotyping)
# summary(mod0)
mod1 <- lme4::lmer(Lambda ~ Block + (1|Population), 
                   data=data_phenotyping)
anova(mod5,mod1,test="Chisq") # difference
## Data: data_phenotyping
## Models:
## mod1: Lambda ~ Block + (1 | Population)
## mod5: Lambda ~ Genetic_Diversity + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## mod1    5 532.53 548.66 -261.26   522.53                         
## mod5    7 520.90 543.48 -253.45   506.90 15.635  2  0.0004027 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# -2*(logLik(mod0)-logLik(mod1)) 
# attr(logLik(mod1), "df")-attr(logLik(mod0), "df") 
# lmtest::lrtest(mod1,mod0)



############ 
###### Posthoc
############ 
emmeans::emmeans(mod0, list(pairwise ~ Genetic_Diversity), adjust = "tukey") 
## $`emmeans of Genetic_Diversity`
##  Genetic_Diversity emmean     SE  df lower.CL upper.CL
##  High                4.63 0.0751 323     4.48     4.78
##  Medium              4.23 0.0828 323     4.06     4.39
##  Low                 4.10 0.0774 323     3.95     4.25
## 
## Results are averaged over the levels of: Population, Block 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Genetic_Diversity`
##  contrast      estimate     SE  df t.ratio p.value
##  High - Medium    0.406 0.0811 323 5.001   <.0001 
##  High - Low       0.532 0.0752 323 7.071   <.0001 
##  Medium - Low     0.126 0.0830 323 1.518   0.2837 
## 
## Results are averaged over the levels of: Population, Block 
## Results are given on the log (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates
############ 
###### CONFIDENCE INTERVAL 
############
mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity-1 + Block + (1|Population), 
                   data=data_phenotyping)
summary(mod0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Lambda ~ Genetic_Diversity - 1 + Block + (1 | Population)
##    Data: data_phenotyping
## 
## REML criterion at convergence: 514.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3747 -0.6242 -0.0778  0.5409  3.4451 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  Population (Intercept) 0.2321   0.4817  
##  Residual               0.7480   0.8649  
## Number of obs: 186, groups:  Population, 57
## 
## Fixed effects:
##                          Estimate Std. Error t value
## Genetic_DiversityHigh    2.569709   0.193717  13.265
## Genetic_DiversityMedium  1.696343   0.227931   7.442
## Genetic_DiversityLow     1.868811   0.190682   9.801
## BlockBlock4              0.220484   0.214487   1.028
## BlockBlock5             -0.003695   0.249586  -0.015
## 
## Correlation of Fixed Effects:
##             Gnt_DH Gnt_DM Gnt_DL BlckB4
## Gntc_DvrstM  0.369                     
## Gntc_DvrstL  0.331  0.235              
## BlockBlock4 -0.636 -0.457 -0.441       
## BlockBlock5 -0.592 -0.410 -0.331  0.451
confint(mod0)
##                              2.5 %    97.5 %
## .sig01                   0.1878335 0.6406211
## .sigma                   0.7698401 0.9855240
## Genetic_DiversityHigh    2.2034040 2.9372360
## Genetic_DiversityMedium  1.2630197 2.1273737
## Genetic_DiversityLow     1.5040561 2.2293763
## BlockBlock4             -0.1881822 0.6259567
## BlockBlock5             -0.4815724 0.4683096
############ 
###### PREDICT
############
data_predict_extinct <- data.frame(Genetic_Diversity = levels(data_phenotyping$Genetic_Diversity),
                                   Block = "Block4")

data_predict_extinct$predict <- predict(mod0, 
                                        type = "response",
                                        re.form = NA,
                                        newdata = data_predict_extinct)



SUM_GrowthRate <- Rmisc::summarySE(data_phenotyping, 
                        measurevar = c("Lambda"),
                       groupvars = c("Genetic_Diversity","Week"), 
                       na.rm = TRUE)

3.7 Remaining He difference

Rmisc::summarySE(data[data$Generation_Eggs==2,],
          measurevar = c("He_remain"),
          groupvars = c("Genetic_Diversity"),
          na.rm=TRUE)
##   Genetic_Diversity  N He_remain          sd           se           ci
## 1              High 27 0.9918493 0.001771315 0.0003408897 0.0007007089
## 2            Medium 23 0.6743040 0.006431259 0.0013410101 0.0027810847
## 3               Low 34 0.5404225 0.012690807 0.0021764555 0.0044280320
mfull <- lm(He_remain ~ Genetic_Diversity, data=data[data$Generation_Eggs==2,])
mless <- lm(He_remain ~ 1, data=data[data$Generation_Eggs==2,])
anova(mfull,mless) #Keep effect
## Analysis of Variance Table
## 
## Model 1: He_remain ~ Genetic_Diversity
## Model 2: He_remain ~ 1
##   Res.Df     RSS Df Sum of Sq     F    Pr(>F)    
## 1     81 0.00631                                 
## 2     83 3.14572 -2   -3.1394 20162 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############ 
###### Posthoc
############ 
emmeans::emmeans(mfull, list(pairwise ~ Genetic_Diversity), adjust = "tukey") 
## $`emmeans of Genetic_Diversity`
##  Genetic_Diversity emmean       SE df lower.CL upper.CL
##  High              0.9918 0.001698 81   0.9877   0.9960
##  Medium            0.6743 0.001840 81   0.6698   0.6788
##  Low               0.5404 0.001513 81   0.5367   0.5441
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## 
## $`pairwise differences of Genetic_Diversity`
##  contrast      estimate      SE df t.ratio p.value
##  High - Medium    0.318 0.00250 81 126.829 <.0001 
##  High - Low       0.451 0.00227 81 198.470 <.0001 
##  Medium - Low     0.134 0.00238 81  56.200 <.0001 
## 
## P value adjustment: tukey method for comparing a family of 3 estimates
Rmisc::summarySE(data[data$Generation_Eggs==2,],
          measurevar = c("He_remain"),
          groupvars = c("Genetic_Diversity"),
          na.rm=TRUE)
##   Genetic_Diversity  N He_remain          sd           se           ci
## 1              High 27 0.9918493 0.001771315 0.0003408897 0.0007007089
## 2            Medium 23 0.6743040 0.006431259 0.0013410101 0.0027810847
## 3               Low 34 0.5404225 0.012690807 0.0021764555 0.0044280320
#Calcul for end:
Rmisc::summarySE(data[data$Generation_Eggs==2&data$Extinction_finalstatus!="yes",],
          measurevar = c("He_end"),
          groupvars = c("Genetic_Diversity"),
          na.rm=TRUE)
##   Genetic_Diversity  N    He_end         sd          se          ci
## 1              High 27 0.9663114 0.01931641 0.003717444 0.007641317
## 2            Medium 18 0.6378048 0.03493729 0.008234799 0.017373906
## 3               Low 26 0.5090656 0.03443416 0.006753094 0.013908257
mfull <- lm(He_end ~ Genetic_Diversity, data=data[data$Generation_Eggs==2&data$Extinction_finalstatus!="yes",])
mless <- lm(He_end ~ 1, data=data[data$Generation_Eggs==2&data$Extinction_finalstatus!="yes",])
anova(mfull,mless) #Keep effect
## Analysis of Variance Table
## 
## Model 1: He_end ~ Genetic_Diversity
## Model 2: He_end ~ 1
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1     68 0.06009                                  
## 2     70 2.97522 -2   -2.9151 1649.3 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############ 
###### Posthoc
############ 
emmeans::emmeans(mfull, list(pairwise ~ Genetic_Diversity), adjust = "tukey") 
## $`emmeans of Genetic_Diversity`
##  Genetic_Diversity emmean      SE df lower.CL upper.CL
##  High               0.966 0.00572 68    0.952    0.980
##  Medium             0.638 0.00701 68    0.621    0.655
##  Low                0.509 0.00583 68    0.495    0.523
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## 
## $`pairwise differences of Genetic_Diversity`
##  contrast      estimate      SE df t.ratio p.value
##  High - Medium    0.329 0.00905 68 36.316  <.0001 
##  High - Low       0.457 0.00817 68 55.978  <.0001 
##  Medium - Low     0.129 0.00912 68 14.124  <.0001 
## 
## P value adjustment: tukey method for comparing a family of 3 estimates

3.8 Growth rate and He

#Best model with genetic diversity
mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population), 
                   data=data_phenotyping)

mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod1 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod2 <- lme4::lmer(Lambda ~ Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod3 <- lme4::lmer(Lambda ~ log(He_end) + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod4 <- lme4::lmer(Lambda ~ 1 + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod5 <- lme4::lmer(Lambda ~ exp(He_end) + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
MuMIn::model.sel(mod0, mod1,mod2,mod3,mod4,mod5)
## Model selection table 
##       (Int) Blc Gnt_Dvr He_end log(He_end) exp(He_end)             family df
## mod1 0.8154   +          1.766                         gaussian(identity)  6
## mod5 0.3445   +                                 0.8311 gaussian(identity)  6
## mod3 2.5470   +                      1.259             gaussian(identity)  6
## mod0 2.5700   +       +                                gaussian(identity)  7
## mod2 2.0770   +                                        gaussian(identity)  5
## mod4 2.0770   +                                        gaussian(identity)  5
##        logLik  AICc delta weight
## mod1 -257.506 527.5  0.00  0.408
## mod5 -258.097 528.7  1.18  0.226
## mod3 -258.156 528.8  1.30  0.213
## mod0 -257.449 529.5  2.05  0.147
## mod2 -263.551 537.4  9.95  0.003
## mod4 -263.551 537.4  9.95  0.003
## Models ranked by AICc(x) 
## Random terms (all models): 
## '1 | Population'
#Makes sense: best model log He
x <- seq(1:100)
y <- seq(1001:1100)
plot(log(x),y)

#############################################################
##################        Analysis        ###################
#############################################################

mod3 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),], REML=FALSE)
mod4 <- lme4::lmer(Lambda ~ Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),], REML=FALSE)
anova(mod3,mod4,test="Chisq") # difference
## Data: data_phenotyping[!is.na(data_phenotyping$He_end), ]
## Models:
## mod4: Lambda ~ Block + (1 | Population)
## mod3: Lambda ~ He_end + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## mod4    5 532.53 548.66 -261.26   522.53                         
## mod3    6 521.90 541.26 -254.95   509.90 12.627  1  0.0003802 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# logLik(mod1)
# lmtest::lrtest(mod4,mod3)




#############################################################
##################        Predict         ###################
#############################################################

#http://rstudio-pubs-static.s3.amazonaws.com/7024_e3a68a9b35454e74abfe15b621c50502.html
mod3 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
SUM_MOD <- summary(mod3)


stats::confint(mod3)
##                   2.5 %    97.5 %
## .sig01       0.23159556 0.6685194
## .sigma       0.76963964 0.9848593
## (Intercept)  0.09849481 1.5283487
## He_end       0.85203146 2.6836107
## BlockBlock4 -0.19603632 0.6401347
## BlockBlock5 -0.48746506 0.4823031
#confint(mod3, method="Wald")
(estimate_He <- SUM_MOD$coefficients["He_end","Estimate"])
## [1] 1.76556
(IC95_lower <- stats::confint(mod3)["He_end","2.5 %"])
## [1] 0.8520315
(IC95_upper <- stats::confint(mod3)["He_end","97.5 %"])
## [1] 2.683611
#PREDICT
data_predict_lambda <- data.frame(He_end =
  seq(min(data_phenotyping[!is.na(data_phenotyping$He_end),]$He_end-0.02),
      max(data_phenotyping[!is.na(data_phenotyping$He_end),]$He_end+0.02),
      0.01),
                                   Block = "Block4")



data_predict_lambda$lambda_predict <- predict(mod3, type = "response",
                                        re.form = NA,
                                        newdata = data_predict_lambda)






#Confidence interval
bb <- lme4::bootMer(mod3,
              FUN=function(x)predict(x, 
                                     newdata = data_predict_lambda,
                                     re.form=NA),
              nsim=1000)
# Take quantiles of predictions from bootstrap replicates.
# These represent the confidence interval of the mean at any value of Time.
data_predict_lambda$CI_lower <- apply(bb$t, 2, quantile, 0.025)
data_predict_lambda$CI_upper <- apply(bb$t, 2, quantile, 0.975)



# 
# cbind(data_predict_lambda,predict(mod3, newdata = data_predict_lambda, 
#                                   interval = "confidence", level = 0.95))

# cbind(data_predict_lambda, predict(mod3, newdata = data_predict_lambda, re.form = NA,
#                      interval = "confidence", level = 0.95))
# 


### Plot 
# Remove pseudoreplication: average of each population
SUM_POP_He_Mean<- Rmisc::summarySE(data_phenotyping,
                            measurevar = c("Lambda"),
                            groupvars = c("Genetic_Diversity",
                                          "He_end",
                                          "Population"), 
                            na.rm=TRUE)

  
PLOT_He_expect <- ggplot2::ggplot(SUM_POP_He_Mean) + 
  ggplot2::geom_ribbon(data = data_predict_lambda, 
                       aes(ymin = CI_lower, 
                           ymax = CI_upper, 
                           x = He_end), 
                       fill="grey80", alpha=0.3) + 
  geom_line(data = data_predict_lambda,
            aes(x = He_end, y = lambda_predict),
            col = "grey40", size = 1.5, linetype = 1) + 
  geom_errorbar(data = SUM_POP_He_Mean, aes(x = He_end, 
                                ymin = Lambda-se, ymax = Lambda+se,
                                colour = Genetic_Diversity), 
             size = 0.6, width=0, alpha = 0.6) + 
  geom_point(data = SUM_POP_He_Mean, 
             aes(x = He_end,  y = Lambda,
                 colour = Genetic_Diversity), 
             size = 3, alpha = 0.9) +
  ylab("Growth rate") +
  xlab("Expected heterozygosity") +
  scale_color_manual(name="Demographic history",
                     breaks = c("High", "Medium", "Low"), 
                    labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"), 
                    values = c("#00AFBB","#FC4E07","#E7B800")) +
  theme_LO + 
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank())
PLOT_He_expect

# cowplot::save_plot(here::here("figures","Fig5_GrowthRate_He.pdf"),   PLOT_He_expect,
#           base_height = 12/cm(1), base_width = 18/cm(1), dpi = 610)

3.9 Adults G6

m0 <- lme4::glmer(Adults ~ Genetic_Diversity  + (1|Population) + (1|ID_Rep:Population),
                family = "poisson", data = data_phenotyping)
summary(m0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: 
## Adults ~ Genetic_Diversity + (1 | Population) + (1 | ID_Rep:Population)
##    Data: data_phenotyping
## 
##      AIC      BIC   logLik deviance df.resid 
##   1818.1   1834.3   -904.1   1808.1      181 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.23572 -0.14532  0.02672  0.14825  0.41394 
## 
## Random effects:
##  Groups            Name        Variance Std.Dev.
##  ID_Rep:Population (Intercept) 0.1739   0.4170  
##  Population        (Intercept) 0.1085   0.3294  
## Number of obs: 186, groups:  ID_Rep:Population, 186; Population, 57
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               4.2943     0.0812  52.884  < 2e-16 ***
## Genetic_DiversityMedium  -0.5132     0.1438  -3.569 0.000358 ***
## Genetic_DiversityLow     -0.3047     0.1295  -2.352 0.018660 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Gnt_DM
## Gntc_DvrstM -0.563       
## Gntc_DvrstL -0.625  0.356
# Equivalent to:
# m0 <- lme4::glmer(Adults ~ Genetic_Diversity  + (1|Population/ID_Rep),
#                 family = "poisson", data = data_5week)


#dispersion_lme4::glmer(m0) #dispersion ok 
#Note: (1|School) + (1|Class:School) is equivalent to (1|School/Class)

m0 <- lme4::glmer(Adults ~ Genetic_Diversity  + (1|Population) + (1|ID_Rep:Population),
                family = "poisson", data = data_phenotyping)
m1 <- lme4::glmer(Adults ~ 1  + (1|Population) + (1|ID_Rep:Population),
                family = "poisson", data = data_phenotyping)
anova(m0,m1,test="Chisq") # difference
## Data: data_phenotyping
## Models:
## m1: Adults ~ 1 + (1 | Population) + (1 | ID_Rep:Population)
## m0: Adults ~ Genetic_Diversity + (1 | Population) + (1 | ID_Rep:Population)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)   
## m1    3 1826.3 1836.0 -910.15   1820.3                        
## m0    5 1818.1 1834.3 -904.07   1808.1 12.164  2   0.002284 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: 
## Adults ~ Genetic_Diversity + (1 | Population) + (1 | ID_Rep:Population)
##    Data: data_phenotyping
## 
##      AIC      BIC   logLik deviance df.resid 
##   1818.1   1834.3   -904.1   1808.1      181 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.23572 -0.14532  0.02672  0.14825  0.41394 
## 
## Random effects:
##  Groups            Name        Variance Std.Dev.
##  ID_Rep:Population (Intercept) 0.1739   0.4170  
##  Population        (Intercept) 0.1085   0.3294  
## Number of obs: 186, groups:  ID_Rep:Population, 186; Population, 57
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               4.2943     0.0812  52.884  < 2e-16 ***
## Genetic_DiversityMedium  -0.5132     0.1438  -3.569 0.000358 ***
## Genetic_DiversityLow     -0.3047     0.1295  -2.352 0.018660 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Gnt_DM
## Gntc_DvrstM -0.563       
## Gntc_DvrstL -0.625  0.356
############ 
###### Posthoc
############ 
emmeans::emmeans(m0, list(pairwise ~ Genetic_Diversity), adjust = "tukey") 
## $`emmeans of Genetic_Diversity`
##  Genetic_Diversity emmean     SE  df asymp.LCL asymp.UCL
##  High                4.29 0.0812 Inf      4.10      4.49
##  Medium              3.78 0.1188 Inf      3.50      4.06
##  Low                 3.99 0.1011 Inf      3.75      4.23
## 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## 
## $`pairwise differences of Genetic_Diversity`
##  contrast      estimate    SE  df z.ratio p.value
##  High - Medium    0.513 0.144 Inf  3.569  0.0010 
##  High - Low       0.305 0.130 Inf  2.352  0.0489 
##  Medium - Low    -0.208 0.156 Inf -1.340  0.3728 
## 
## Results are given on the log (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates
#test double nested
m0 <- lme4::glmer(Adults ~ Genetic_Diversity  + (1|Genetic_Diversity/Population/ID_Rep),
                family = "poisson", data = data_phenotyping)
m1 <- lme4::glmer(Adults ~ 1  + (1|Genetic_Diversity/Population/ID_Rep),
                family = "poisson", data = data_phenotyping)
anova(m0,m1,test="Chisq") 
## Data: data_phenotyping
## Models:
## m1: Adults ~ 1 + (1 | Genetic_Diversity/Population/ID_Rep)
## m0: Adults ~ Genetic_Diversity + (1 | Genetic_Diversity/Population/ID_Rep)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## m1    4 1823.6 1836.5 -907.81   1815.6                       
## m0    6 1820.1 1839.5 -904.07   1808.1 7.4783  2    0.02377 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m0 <- lme4::glmer(Adults ~ Genetic_Diversity  + (1|Genetic_Diversity/ID_Rep),
                family = "poisson", data = data_phenotyping)
m1 <- lme4::glmer(Adults ~ 1  + (1|Genetic_Diversity/ID_Rep),
                family = "poisson", data = data_phenotyping)
anova(m0,m1,test="Chisq") 
## Data: data_phenotyping
## Models:
## m1: Adults ~ 1 + (1 | Genetic_Diversity/ID_Rep)
## m0: Adults ~ Genetic_Diversity + (1 | Genetic_Diversity/ID_Rep)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)   
## m1    3 1844.6 1854.3 -919.30   1838.6                        
## m0    5 1838.5 1854.6 -914.26   1828.5 10.086  2   0.006455 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.10 Adults G2

m0 <- glm(Nb_adults ~ Genetic_Diversity + Block,  family = "poisson", data = data[data$Generation_Eggs==2,])
summary(m0)
## 
## Call:
## glm(formula = Nb_adults ~ Genetic_Diversity + Block, family = "poisson", 
##     data = data[data$Generation_Eggs == 2, ])
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -18.7057   -2.7621    0.2126    3.3983   11.4454  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              5.73890    0.01497 383.312  < 2e-16 ***
## Genetic_DiversityMedium -0.19408    0.01628 -11.920  < 2e-16 ***
## Genetic_DiversityLow    -0.19278    0.01460 -13.205  < 2e-16 ***
## BlockBlock4              0.10767    0.01579   6.817  9.3e-12 ***
## BlockBlock5              0.17743    0.01600  11.088  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2379.6  on 83  degrees of freedom
## Residual deviance: 2027.9  on 79  degrees of freedom
## AIC: 2667.2
## 
## Number of Fisher Scoring iterations: 4
m1<- glm(Nb_adults ~ Block,  family = "poisson", data = data[data$Generation_Eggs==2,])
anova(m0,m1,test="Chisq") # difference
## Analysis of Deviance Table
## 
## Model 1: Nb_adults ~ Genetic_Diversity + Block
## Model 2: Nb_adults ~ Block
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        79     2027.9                          
## 2        81     2241.4 -2  -213.52 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############ 
###### Posthoc
############ 
emmeans::emmeans(m0, list(pairwise ~ Genetic_Diversity), adjust = "tukey") 
## $`emmeans of Genetic_Diversity`
##  Genetic_Diversity emmean      SE  df asymp.LCL asymp.UCL
##  High               5.834 0.01051 Inf     5.809     5.859
##  Medium             5.640 0.01243 Inf     5.610     5.670
##  Low                5.641 0.01022 Inf     5.617     5.666
## 
## Results are averaged over the levels of: Block 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## 
## $`pairwise differences of Genetic_Diversity`
##  contrast      estimate     SE  df z.ratio p.value
##  High - Medium   0.1941 0.0163 Inf 11.920  <.0001 
##  High - Low      0.1928 0.0146 Inf 13.205  <.0001 
##  Medium - Low   -0.0013 0.0161 Inf -0.081  0.9964 
## 
## Results are averaged over the levels of: Block 
## Results are given on the log (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates

3.11 Proportion life stage

##### P_adults 
mod0 <- lme4::glmer(p_adults ~ Genetic_Diversity + Block + (1|Population), 
                   data = data_phenotyping_4week,family = binomial(link = "logit"),
                   weights = Nb_ttx)
mod1 <- lme4::glmer(p_adults ~ 1 + Block + (1|Population), 
                   data = data_phenotyping_4week,family = binomial(link = "logit"), 
                    weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference
## Data: data_phenotyping_4week
## Models:
## mod1: p_adults ~ 1 + Block + (1 | Population)
## mod0: p_adults ~ Genetic_Diversity + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod1    4 723.41 733.18 -357.70   715.41                     
## mod0    6 722.93 737.58 -355.46   710.93 4.4827  2     0.1063
rm(mod0,mod1)


mod0 <- lme4::glmer(p_adults ~ Genetic_Diversity + Block + (1|Population), 
                   data = data_phenotyping,family = binomial(link = "logit"),
                   weights = Nb_ttx)
mod1 <- lme4::glmer(p_adults ~ 1 + Block + (1|Population), 
                   data = data_phenotyping,family = binomial(link = "logit"), 
                    weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference
## Data: data_phenotyping
## Models:
## mod1: p_adults ~ 1 + Block + (1 | Population)
## mod0: p_adults ~ Genetic_Diversity + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod1    4 893.45 906.31 -442.73   885.45                    
## mod0    6 896.17 915.46 -442.08   884.17 1.284  2     0.5262
rm(mod0,mod1)


##### P_adults 
mod0 <- lme4::glmer(p_pupae ~ Genetic_Diversity + Block + (1|Population), 
                   data = data_phenotyping_4week,family = binomial(link = "logit"),
                   weights = Nb_ttx)
mod1 <- lme4::glmer(p_pupae ~ 1 + Block + (1|Population), 
                   data = data_phenotyping_4week,family = binomial(link = "logit"), 
                    weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference
## Data: data_phenotyping_4week
## Models:
## mod1: p_pupae ~ 1 + Block + (1 | Population)
## mod0: p_pupae ~ Genetic_Diversity + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod1    4 692.89 702.67 -342.45   684.89                     
## mod0    6 692.61 707.26 -340.30   680.61 4.2861  2     0.1173
rm(mod0,mod1)

mod0 <- lme4::glmer(p_pupae ~ Genetic_Diversity + Block + (1|Population), 
                   data = data_phenotyping,family = binomial(link = "logit"),
                   weights = Nb_ttx)
mod1 <- lme4::glmer(p_pupae ~ 1 + Block + (1|Population), 
                   data = data_phenotyping,family = binomial(link = "logit"), 
                    weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference
## Data: data_phenotyping
## Models:
## mod1: p_pupae ~ 1 + Block + (1 | Population)
## mod0: p_pupae ~ Genetic_Diversity + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod1    4 539.89 552.75 -265.95   531.89                    
## mod0    6 542.26 561.54 -265.13   530.26 1.639  2     0.4407
rm(mod0,mod1)

##### P_larvae 
mod0 <- lme4::glmer(p_larvae ~ Genetic_Diversity + Block + (1|Population), 
                   data = data_phenotyping_4week,family = binomial(link = "logit"),
                   weights = Nb_ttx)
mod1 <- lme4::glmer(p_larvae ~ 1 + Block + (1|Population), 
                   data = data_phenotyping_4week,family = binomial(link = "logit"), 
                    weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference and convergence issue
## Data: data_phenotyping_4week
## Models:
## mod1: p_larvae ~ 1 + Block + (1 | Population)
## mod0: p_larvae ~ Genetic_Diversity + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## mod1    4 441.93 451.71 -216.97   433.93                    
## mod0    6 445.08 459.74 -216.54   433.08 0.853  2     0.6528
mod0 <- lme4::glmer(p_larvae ~ Genetic_Diversity + Block + (1|Population), 
                   data = data_phenotyping,family = binomial(link = "logit"),
                   weights = Nb_ttx)
mod1 <- lme4::glmer(p_larvae ~ 1 + Block + (1|Population), 
                   data = data_phenotyping,family = binomial(link = "logit"), 
                    weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference and convergence issue
## Data: data_phenotyping
## Models:
## mod1: p_larvae ~ 1 + Block + (1 | Population)
## mod0: p_larvae ~ Genetic_Diversity + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## mod1    4 737.53 750.39 -364.77   729.53                     
## mod0    6 740.62 759.91 -364.31   728.62 0.9082  2      0.635

3.12 Multinomial life stage

# data(alligator)
# head(alligator)
# fit <- multinom(food ~ lake+size+sex, data=alligator, weights=count)
# summary(fit$fitted.values)
data_TEMP <- data_phenotyping_all[,c(1:5,11:13,23)]
data_multinom <- tidyr::gather(data_TEMP,"stage","count",6:8)
data_multinom$Divsersity <- as.factor(data_multinom$Divsersity )
head(data_multinom)
##   Population   Week  Block ID_Rep Divsersity    He_end  stage count
## 1      High1 5-week Block3     52       High 0.9666047 Larvae     3
## 2      High1 5-week Block3     53       High 0.9666047 Larvae     1
## 3     High13 5-week Block4    129       High 0.9772634 Larvae     2
## 4     High13 5-week Block4    130       High 0.9772634 Larvae     1
## 5     High13 5-week Block4    131       High 0.9772634 Larvae     2
## 6     High14 5-week Block4    132       High 0.9752381 Larvae     1
# 
# fit <- nnet::multinom(stage ~ Block + Divsersity + Week + 
#                   ID_Rep , data=data_multinom, weights=count)
# summary(fit$fitted.values)
# fit
# 
# fit0 <- nnet::multinom(stage ~ Block + Divsersity + 
#                    ID_Rep , data=data_multinom, weights=count)
# anova(fit,fit0)

# 
# fit1 <- nnet::multinom(stage ~ Block +  Week + ID_Rep, data=data_multinom, weights=count)
# anova(fit1,fit)
# 
# 
# ### New function to test random effect MCLOGIT
# # data_TEMP <- data_phenotyping_all[,c(1:5,17:19,23)]
# # data_multinom <- tidyr::gather(data_TEMP,"stage","prop",6:8)
# # data_multinom$Divsersity <- as.factor(data_multinom$Divsersity )
# # head(data_multinom)
# 
# 
# #Other example
# library("AER")
# library("mlogit")


#### 1- Dataset 1: Train transport
# data("TravelMode", package="AER")
# head(TravelMode)
# #Here: Each individual can choose across 4 modes of transports: air train bus or car
#   #We can determine if this choice is random 
#     #or determined by different variables as: vcost, travel, etc. 
# 
# TM <- mlogit::mlogit.data(data = TravelMode,   #dataset
#                           choice = "choice",   #Variable containing the choice: yes or no
#                           shape = "long",      #type of  dataset
#                           alt.levels = "mode") #Variable contianing the list of choice 
# 
# fit0 <- mlogit::mlogit.data(data=data_multinom, 
#                             choice = "count", 
#                             alt.levels = "stage", 
#                             shape = "long")
# 
# 




#### 2- Dataset 2: Fishing price 
#https://rdrr.io/cran/mlogit/man/mlogit.html

# data("Fishing", package = "mlogit")
# head(Fishing)
# #There are: 
      #two alternative specific variables : price and catch one individual
      #specific variable (income) 
      #four fishing mode : beach, pier, boat, charter

#A dataframe containing :
  #mode: recreation mode choice, one of : beach, pier, boat and charter,
  #price.beach: price for beach mode
  #price.pier: price for pier mode,
  #price.boat: price for private boat mode,
  #price.charter: price for charter boat mode,
  #catch.beach: catch rate for beach mode,
  #catch.pier: catch rate for pier mode,
  #catch.boat: catch rate for private boat mode,
  #catch.charter: catch rate for charter boat mode,
  #income: monthly income,










#### Discussion with Ann Hess 
## Response is stage
## Weight or group with number 
## As Random effect: ID_Box (6 values)
## As Random effect: Population (one to 6 replicates per population)
## As fixed effect: diversity x Time 
data_multinom$stage <- as.factor(data_multinom$stage)
data_multinom$stage <- factor(data_multinom$stage, levels = c("Larvae", "Pupae", "Adults"))

 
# fit <- nnet::multinom(stage ~ Block + Divsersity + Week + 
#                   ID_Rep , data=data_multinom, weights=count)

# mod0 <- ordinal::clmm(stage ~ Block + Divsersity * Week + 
#                         (1|ID_Rep) + (1|Population), 
#                       data=data_multinom, weights=count)
mod1 <- ordinal::clmm(stage ~ Block + Divsersity + Week +
                        (1|ID_Rep) + (1|Population), 
                      data=data_multinom, weights=count)
mod2 <- ordinal::clmm(stage ~ Block + Divsersity  + 
                        (1|ID_Rep) + (1|Population), 
                      data=data_multinom, weights=count)
mod3 <- ordinal::clmm(stage ~ Block + Week  +
                        (1|ID_Rep) + (1|Population), 
                      data=data_multinom, weights=count)
#anova(mod1,mod0) #sign
anova(mod1,mod2) #sign
## Likelihood ratio tests of cumulative link models:
##  
##      formula:                                                            link:
## mod2 stage ~ Block + Divsersity + (1 | ID_Rep) + (1 | Population)        logit
## mod1 stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population) logit
##      threshold:
## mod2 flexible  
## mod1 flexible  
## 
##      no.par   AIC  logLik LR.stat df Pr(>Chisq)    
## mod2      8 16875 -8429.7                          
## mod1      9 14842 -7412.2  2035.1  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod1,mod3) # non sign
## Likelihood ratio tests of cumulative link models:
##  
##      formula:                                                            link:
## mod3 stage ~ Block + Week + (1 | ID_Rep) + (1 | Population)              logit
## mod1 stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population) logit
##      threshold:
## mod3 flexible  
## mod1 flexible  
## 
##      no.par   AIC  logLik LR.stat df Pr(>Chisq)
## mod3      7 14838 -7412.2                      
## mod1      9 14842 -7412.2   0.127  2     0.9385
-2*(logLik(mod2)-logLik(mod1)) 
## 'log Lik.' 2035.075 (df=8)
lmtest::lrtest(mod2,mod1)
## Likelihood ratio test
## 
## Model 1: stage ~ Block + Divsersity + (1 | ID_Rep) + (1 | Population)
## Model 2: stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population)
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   8 -8429.7                         
## 2   9 -7412.2  1 2035.1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lmtest::lrtest(mod3,mod1)
## Likelihood ratio test
## 
## Model 1: stage ~ Block + Week + (1 | ID_Rep) + (1 | Population)
## Model 2: stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population)
##   #Df  LogLik Df Chisq Pr(>Chisq)
## 1   7 -7412.2                    
## 2   9 -7412.2  2 0.127     0.9385
mod3 <- ordinal::clmm(stage ~ Week + Block +
                        (1|ID_Rep) + (1|Population), 
                      data=data_multinom, weights=count)
mod3$coefficients
## Larvae|Pupae Pupae|Adults   Week5-week  BlockBlock4  BlockBlock5 
##   -2.6968695   -0.8006353    2.6176503    0.1047649    0.1707286
confint(mod3)
##                   2.5 %     97.5 %
## Larvae|Pupae -3.0357006 -2.3580384
## Pupae|Adults -1.1328356 -0.4684350
## Week5-week    2.4833263  2.7519744
## BlockBlock4  -0.3443890  0.5539188
## BlockBlock5  -0.3816695  0.7231267
sigma(mod3)
## numeric(0)
summary(mod3)
## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: stage ~ Week + Block + (1 | ID_Rep) + (1 | Population)
## data:    data_multinom
## 
##  link  threshold nobs  logLik   AIC      niter     max.grad cond.H 
##  logit flexible  19277 -7412.25 14838.49 423(2995) 3.24e-03 1.8e+02
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  ID_Rep     (Intercept) 0.5185   0.7200  
##  Population (Intercept) 0.3401   0.5832  
## Number of groups:  ID_Rep 189,  Population 62 
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## Week5-week   2.61765    0.06853  38.195   <2e-16 ***
## BlockBlock4  0.10476    0.22916   0.457    0.648    
## BlockBlock5  0.17073    0.28184   0.606    0.545    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##              Estimate Std. Error z value
## Larvae|Pupae  -2.6969     0.1729 -15.600
## Pupae|Adults  -0.8006     0.1695  -4.724
## (261 observations deleted due to missingness)

4 SUMMARY ALL ANALYSIS

######### Proba of extinction 
mod0 <- glm(y ~ Genetic_Diversity + Block,  
      data = data_proba_extinction, family = binomial)
mod1 <- glm(y ~ Block ,  
      data = data_proba_extinction, family = binomial)
anova(mod1, mod0, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: y ~ Block
## Model 2: y ~ Genetic_Diversity + Block
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1        81     58.903                        
## 2        79     46.833  2   12.069 0.002394 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans::emmeans(mod0, list(pairwise ~ Genetic_Diversity), adjust = "tukey") 
## $`emmeans of Genetic_Diversity`
##  Genetic_Diversity emmean       SE  df asymp.LCL asymp.UCL
##  High              -20.07 1855.743 Inf  -4451.10  4410.961
##  Medium             -1.77    0.650 Inf     -3.32    -0.216
##  Low                -1.56    0.532 Inf     -2.83    -0.290
## 
## Results are averaged over the levels of: Block 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## 
## $`pairwise differences of Genetic_Diversity`
##  contrast      estimate       SE  df z.ratio p.value
##  High - Medium  -18.300 1855.743 Inf -0.010  0.9999 
##  High - Low     -18.506 1855.743 Inf -0.010  0.9999 
##  Medium - Low    -0.206    0.751 Inf -0.274  0.9594 
## 
## Results are averaged over the levels of: Block 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates
#########Time of extinction
mod1 <- glm(Generation_Eggs ~ Genetic_Diversity + Block,
                    data = data_timeextinction, family = "poisson")
mod0 <- glm(Generation_Eggs ~  Block,
                    data = data_timeextinction, family = "poisson")
anova(mod0, mod1, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Generation_Eggs ~ Block
## Model 2: Generation_Eggs ~ Genetic_Diversity + Block
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1        10    0.95459                     
## 2         9    0.74888  1  0.20571   0.6502
#########Dynamic over time 
# Test effect
mod_1 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +   s(Generation_Eggs, k=5) +
                    s(Generation_Eggs, by = Genetic_Diversity, k=5) + 
                    s(Population, bs="re"), method="ML", family = mgcv::nb(), data=data_dyn)
mod_2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block  +  s(Generation_Eggs, k=5) +
                    s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')

itsadug::compareML(mod_1, mod_2)
## mod_1: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population, 
##     bs = "re")
## 
## mod_2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) + 
##     s(Population, bs = "re")
## 
## Chi-square test of ML scores
## -----
##   Model    Score Edf Difference    Df p.value Sig.
## 1 mod_2 1977.920   8                              
## 2 mod_1 1974.918  14      3.003 6.000   0.423     
## 
## AIC difference: -4.21, model mod_1 has lower AIC.
#########Growth rate 
mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population), 
                   data=data_phenotyping)
mod1 <- lme4::lmer(Lambda ~ Block + (1|Population), 
                   data=data_phenotyping)
anova(mod0,mod1,test="Chisq") # difference
## Data: data_phenotyping
## Models:
## mod1: Lambda ~ Block + (1 | Population)
## mod0: Lambda ~ Genetic_Diversity + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## mod1    5 532.53 548.66 -261.26   522.53                         
## mod0    7 520.90 543.48 -253.45   506.90 15.635  2  0.0004027 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans::emmeans(mod0, list(pairwise ~ Genetic_Diversity), adjust = "tukey") 
## $`emmeans of Genetic_Diversity`
##  Genetic_Diversity emmean    SE   df lower.CL upper.CL
##  High                2.64 0.135 42.2     2.31     2.98
##  Medium              1.77 0.198 51.5     1.28     2.26
##  Low                 1.94 0.177 58.2     1.51     2.38
## 
## Results are averaged over the levels of: Block 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## 
## $`pairwise differences of Genetic_Diversity`
##  contrast      estimate    SE   df t.ratio p.value
##  High - Medium    0.873 0.239 48.0  3.653  0.0018 
##  High - Low       0.701 0.223 52.5  3.143  0.0076 
##  Medium - Low    -0.172 0.261 53.0 -0.660  0.7873 
## 
## Results are averaged over the levels of: Block 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates
#########Growth rate vs. He
mod3 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod4 <- lme4::lmer(Lambda ~ 1 + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
anova(mod3,mod4,test="Chisq") # difference
## Data: data_phenotyping[!is.na(data_phenotyping$He_end), ]
## Models:
## mod4: Lambda ~ 1 + Block + (1 | Population)
## mod3: Lambda ~ He_end + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## mod4    5 532.53 548.66 -261.26   522.53                         
## mod3    6 521.90 541.26 -254.95   509.90 12.627  1  0.0003802 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
########Life stage
mod1 <- ordinal::clmm(stage ~ Block + Divsersity + Week +
                        (1|ID_Rep) + (1|Population), 
                      data=data_multinom, weights=count)
mod2 <- ordinal::clmm(stage ~ Block + Divsersity  + 
                        (1|ID_Rep) + (1|Population), 
                      data=data_multinom, weights=count)
mod3 <- ordinal::clmm(stage ~ Block + Week  +
                        (1|ID_Rep) + (1|Population), 
                      data=data_multinom, weights=count)
anova(mod1,mod2) #sign
## Likelihood ratio tests of cumulative link models:
##  
##      formula:                                                            link:
## mod2 stage ~ Block + Divsersity + (1 | ID_Rep) + (1 | Population)        logit
## mod1 stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population) logit
##      threshold:
## mod2 flexible  
## mod1 flexible  
## 
##      no.par   AIC  logLik LR.stat df Pr(>Chisq)    
## mod2      8 16875 -8429.7                          
## mod1      9 14842 -7412.2  2035.1  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod1,mod3) # non sign
## Likelihood ratio tests of cumulative link models:
##  
##      formula:                                                            link:
## mod3 stage ~ Block + Week + (1 | ID_Rep) + (1 | Population)              logit
## mod1 stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population) logit
##      threshold:
## mod3 flexible  
## mod1 flexible  
## 
##      no.par   AIC  logLik LR.stat df Pr(>Chisq)
## mod3      7 14838 -7412.2                      
## mod1      9 14842 -7412.2   0.127  2     0.9385

5 All analysis with He start or He

######### Proba of extinction 
#Prepare clean dataset
data_temp <- data[data$Generation_Eggs==6,c("Block","Population","He_start","Extinction_finalstatus")]
data_temp$Extinction_finalstatus <- as.factor(data_temp$Extinction_finalstatus)
# #### HERE ADD 
# data_temp$Extinction_finalstatus <- as.factor(data_temp$Extinction_finalstatus)
# 
# data_temp$y <- ifelse(data_temp$Extinction_finalstatus == "yes", 1, 0)
# 
# 
# mod0 <- glm(y ~ He_start + Block,  data = data_temp, family = binomial)
# mod1 <- glm(y ~ Block ,   data = data_temp, family = binomial)
# anova(mod1, mod0, test = "Chisq")
# summary(mod0)
# 



#########Time of extinction
mod1 <- glm(Generation_Eggs ~ He_start + Block,
                    data = data_timeextinction, family = "poisson")
mod0 <- glm(Generation_Eggs ~  Block,
                    data = data_timeextinction, family = "poisson")
anova(mod0, mod1, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Generation_Eggs ~ Block
## Model 2: Generation_Eggs ~ He_start + Block
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1        10    0.95459                     
## 2         9    0.76852  1  0.18607   0.6662
summary(mod1)
## 
## Call:
## glm(formula = Generation_Eggs ~ He_start + Block, family = "poisson", 
##     data = data_timeextinction)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.42430  -0.14557   0.03684   0.07219   0.50431  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   2.4209     1.5248   1.588    0.112
## He_start     -0.9243     2.1583  -0.428    0.668
## BlockBlock4  -0.1473     0.5277  -0.279    0.780
## BlockBlock5  -0.3318     0.4810  -0.690    0.490
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1.40752  on 12  degrees of freedom
## Residual deviance: 0.76852  on  9  degrees of freedom
## AIC: 53.687
## 
## Number of Fisher Scoring iterations: 4
#########Dynamic over time 
# Test effect
mod1 <- lme4::glmer(Nb_adults~Generation_Eggs*He_start + gen_square*He_start + 
                Block  + (1|obs) + (1|Population), 
            data = data[data$Generation_Eggs>=2&data$Extinction_finalstatus=="no",], family = "poisson")
mod2 <- lme4::glmer(Nb_adults~Generation_Eggs + gen_square + He_start + 
                Block  + (1|obs) + (1|Population), 
            data = data[data$Generation_Eggs>=2&data$Extinction_finalstatus=="no",], family = "poisson")


#Do not converge
#anova(mod1, mod2, test ="Chisq")  


#########Growth rate 
mod0 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod1 <- lme4::lmer(Lambda ~ Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
anova(mod0,mod1,test="Chisq") # difference
## Data: data_phenotyping[!is.na(data_phenotyping$He_end), ]
## Models:
## mod1: Lambda ~ Block + (1 | Population)
## mod0: Lambda ~ He_end + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## mod1    5 532.53 548.66 -261.26   522.53                         
## mod0    6 521.90 541.26 -254.95   509.90 12.627  1  0.0003802 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#########Growth rate vs. He
mod3 <- lme4::lmer(Lambda ~ log(He_end) + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod4 <- lme4::lmer(Lambda ~ 1 + Block + (1|Population), 
                   data=data_phenotyping[!is.na(data_phenotyping$He_end),])
anova(mod3,mod4,test="Chisq") # difference
## Data: data_phenotyping[!is.na(data_phenotyping$He_end), ]
## Models:
## mod4: Lambda ~ 1 + Block + (1 | Population)
## mod3: Lambda ~ log(He_end) + Block + (1 | Population)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## mod4    5 532.53 548.66 -261.26   522.53                         
## mod3    6 522.65 542.01 -255.33   510.65 11.878  1   0.000568 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1